View Javadoc
1 /* 2 * $Header: /home/cvs/jakarta-commons/httpclient/src/java/org/apache/commons/httpclient/URI.java,v 1.36 2003/07/01 01:12:29 mbecke Exp $ 3 * $Revision: 1.36 $ 4 * $Date: 2003/07/01 01:12:29 $ 5 * 6 * ==================================================================== 7 * 8 * The Apache Software License, Version 1.1 9 * 10 * Copyright (c) 2002-2003 The Apache Software Foundation. All rights 11 * reserved. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 20 * 2. Redistributions in binary form must reproduce the above copyright 21 * notice, this list of conditions and the following disclaimer in 22 * the documentation and/or other materials provided with the 23 * distribution. 24 * 25 * 3. The end-user documentation included with the redistribution, if 26 * any, must include the following acknowlegement: 27 * "This product includes software developed by the 28 * Apache Software Foundation (http://www.apache.org/)." 29 * Alternately, this acknowlegement may appear in the software itself, 30 * if and wherever such third-party acknowlegements normally appear. 31 * 32 * 4. The names "The Jakarta Project", "Commons", and "Apache Software 33 * Foundation" must not be used to endorse or promote products derived 34 * from this software without prior written permission. For written 35 * permission, please contact apache@apache.org. 36 * 37 * 5. Products derived from this software may not be called "Apache" 38 * nor may "Apache" appear in their names without prior written 39 * permission of the Apache Group. 40 * 41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 42 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 44 * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR 45 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 48 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 49 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 50 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 51 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 52 * SUCH DAMAGE. 53 * ==================================================================== 54 * 55 * This software consists of voluntary contributions made by many 56 * individuals on behalf of the Apache Software Foundation. For more 57 * information on the Apache Software Foundation, please see 58 * <http://www.apache.org/>;. 59 * 60 * [Additional notices, if required by prior licensing conditions] 61 * 62 */ 63 64 package org.apache.commons.httpclient; 65 66 import java.io.IOException; 67 import java.io.ObjectInputStream; 68 import java.io.ObjectOutputStream; 69 import java.io.Serializable; 70 import java.io.UnsupportedEncodingException; 71 import java.util.Locale; 72 import java.util.BitSet; 73 import java.util.Hashtable; 74 import java.net.URL; 75 import java.security.AccessController; 76 import sun.security.action.GetPropertyAction; 77 78 /*** 79 * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396. 80 * This class has the purpose of supportting of parsing a URI reference to 81 * extend any specific protocols, the character encoding of the protocol to 82 * be transported and the charset of the document. 83 * <p> 84 * A URI is always in an "escaped" form, since escaping or unescaping a 85 * completed URI might change its semantics. 86 * <p> 87 * Implementers should be careful not to escape or unescape the same string 88 * more than once, since unescaping an already unescaped string might lead to 89 * misinterpreting a percent data character as another escaped character, 90 * or vice versa in the case of escaping an already escaped string. 91 * <p> 92 * In order to avoid these problems, data types used as follows: 93 * <p><blockquote><pre> 94 * URI character sequence: char 95 * octet sequence: byte 96 * original character sequence: String 97 * </pre></blockquote><p> 98 * 99 * So, a URI is a sequence of characters as an array of a char type, which 100 * is not always represented as a sequence of octets as an array of byte. 101 * <p> 102 * 103 * URI Syntactic Components 104 * <p><blockquote><pre> 105 * - In general, written as follows: 106 * Absolute URI = <scheme>:<scheme-specific-part> 107 * Generic URI = <scheme>://<authority><path>?<query> 108 * 109 * - Syntax 110 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 111 * hier_part = ( net_path | abs_path ) [ "?" query ] 112 * net_path = "//" authority [ abs_path ] 113 * abs_path = "/" path_segments 114 * </pre></blockquote><p> 115 * 116 * The following examples illustrate URI that are in common use. 117 * <pre> 118 * ftp://ftp.is.co.za/rfc/rfc1808.txt 119 * -- ftp scheme for File Transfer Protocol services 120 * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles 121 * -- gopher scheme for Gopher and Gopher+ Protocol services 122 * http://www.math.uio.no/faq/compression-faq/part1.html 123 * -- http scheme for Hypertext Transfer Protocol services 124 * mailto:mduerst@ifi.unizh.ch 125 * -- mailto scheme for electronic mail addresses 126 * news:comp.infosystems.www.servers.unix 127 * -- news scheme for USENET news groups and articles 128 * telnet://melvyl.ucop.edu/ 129 * -- telnet scheme for interactive services via the TELNET Protocol 130 * </pre> 131 * Please, notice that there are many modifications from URL(RFC 1738) and 132 * relative URL(RFC 1808). 133 * <p> 134 * <b>The expressions for a URI</b> 135 * <p><pre> 136 * For escaped URI forms 137 * - URI(char[]) // constructor 138 * - char[] getRawXxx() // method 139 * - String getEscapedXxx() // method 140 * - String toString() // method 141 * <p> 142 * For unescaped URI forms 143 * - URI(String) // constructor 144 * - String getXXX() // method 145 * </pre><p> 146 * 147 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a> 148 * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a> 149 * @version $Revision: 1.36 $ $Date: 2002/03/14 15:14:01 150 */ 151 public class URI implements Cloneable, Comparable, Serializable { 152 153 154 // ----------------------------------------------------------- Constructors 155 156 /*** Create an instance as an internal use */ 157 protected URI() { 158 } 159 160 161 /*** 162 * Construct a URI as an escaped form of a character array with the given 163 * charset. 164 * 165 * @param escaped the URI character sequence 166 * @param charset the charset string to do escape encoding 167 * @throws URIException If the URI cannot be created. 168 * @throws NullPointerException if <code>escaped</code> is <code>null</code> 169 * @see #getProtocolCharset 170 */ 171 public URI(char[] escaped, String charset) 172 throws URIException, NullPointerException { 173 protocolCharset = charset; 174 parseUriReference(new String(escaped), true); 175 } 176 177 178 /*** 179 * Construct a URI as an escaped form of a character array. 180 * An URI can be placed within double-quotes or angle brackets like 181 * "http://test.com/" and <http://test.com/> 182 * 183 * @param escaped the URI character sequence 184 * @throws URIException If the URI cannot be created. 185 * @throws NullPointerException if <code>escaped</code> is <code>null</code> 186 * @see #getDefaultProtocolCharset 187 */ 188 public URI(char[] escaped) 189 throws URIException, NullPointerException { 190 parseUriReference(new String(escaped), true); 191 } 192 193 194 /*** 195 * Construct a URI from the given string with the given charset. 196 * 197 * @param original the string to be represented to URI character sequence 198 * It is one of absoluteURI and relativeURI. 199 * @param charset the charset string to do escape encoding 200 * @throws URIException If the URI cannot be created. 201 * @see #getProtocolCharset 202 */ 203 public URI(String original, String charset) throws URIException { 204 protocolCharset = charset; 205 parseUriReference(original, false); 206 } 207 208 209 /*** 210 * Construct a URI from the given string. 211 * <p><blockquote><pre> 212 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 213 * </pre></blockquote><p> 214 * An URI can be placed within double-quotes or angle brackets like 215 * "http://test.com/" and <http://test.com/> 216 * 217 * @param original the string to be represented to URI character sequence 218 * It is one of absoluteURI and relativeURI. 219 * @throws URIException If the URI cannot be created. 220 * @see #getDefaultProtocolCharset 221 */ 222 public URI(String original) throws URIException { 223 parseUriReference(original, false); 224 } 225 226 227 /*** 228 * Construct a URI from a URL. 229 * 230 * @param url a valid URL. 231 * @throws URIException If the URI cannot be created. 232 * @since 2.0 233 * @deprecated currently somewhat wrong and diffrent with java.net.URL usage 234 */ 235 public URI(URL url) throws URIException { 236 this(url.toString()); 237 } 238 239 240 /*** 241 * Construct a general URI from the given components. 242 * <p><blockquote><pre> 243 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 244 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 245 * opaque_part = uric_no_slash *uric 246 * </pre></blockquote><p> 247 * It's for absolute URI = <scheme>:<scheme-specific-part># 248 * <fragment>. 249 * 250 * @param scheme the scheme string 251 * @param schemeSpecificPart scheme_specific_part 252 * @param fragment the fragment string 253 * @throws URIException If the URI cannot be created. 254 * @see #getDefaultProtocolCharset 255 */ 256 public URI(String scheme, String schemeSpecificPart, String fragment) 257 throws URIException { 258 259 // validate and contruct the URI character sequence 260 if (scheme == null) { 261 throw new URIException(URIException.PARSING, "scheme required"); 262 } 263 char[] s = scheme.toLowerCase().toCharArray(); 264 if (validate(s, URI.scheme)) { 265 _scheme = s; // is_absoluteURI 266 } else { 267 throw new URIException(URIException.PARSING, "incorrect scheme"); 268 } 269 _opaque = encode(schemeSpecificPart, allowed_opaque_part, 270 getProtocolCharset()); 271 // Set flag 272 _is_opaque_part = true; 273 _fragment = fragment.toCharArray(); 274 275 setURI(); 276 } 277 278 279 /*** 280 * Construct a general URI from the given components. 281 * <p><blockquote><pre> 282 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 283 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 284 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 285 * hier_part = ( net_path | abs_path ) [ "?" query ] 286 * </pre></blockquote><p> 287 * It's for absolute URI = <scheme>:<path>?<query>#< 288 * fragment> and relative URI = <path>?<query>#<fragment 289 * >. 290 * 291 * @param scheme the scheme string 292 * @param authority the authority string 293 * @param path the path string 294 * @param query the query string 295 * @param fragment the fragment string 296 * @throws URIException If the new URI cannot be created. 297 * @see #getDefaultProtocolCharset 298 */ 299 public URI(String scheme, String authority, String path, String query, 300 String fragment) throws URIException { 301 302 // validate and contruct the URI character sequence 303 StringBuffer buff = new StringBuffer(); 304 if (scheme != null) { 305 buff.append(scheme); 306 buff.append(':'); 307 } 308 if (authority != null) { 309 buff.append("//"); 310 buff.append(authority); 311 } 312 if (path != null) { // accept empty path 313 if ((scheme != null || authority != null) 314 && !path.startsWith("/")) { 315 throw new URIException(URIException.PARSING, 316 "abs_path requested"); 317 } 318 buff.append(path); 319 } 320 if (query != null) { 321 buff.append('?'); 322 buff.append(query); 323 } 324 if (fragment != null) { 325 buff.append('#'); 326 buff.append(fragment); 327 } 328 parseUriReference(buff.toString(), false); 329 } 330 331 332 /*** 333 * Construct a general URI from the given components. 334 * 335 * @param scheme the scheme string 336 * @param userinfo the userinfo string 337 * @param host the host string 338 * @param port the port number 339 * @throws URIException If the new URI cannot be created. 340 * @see #getDefaultProtocolCharset 341 */ 342 public URI(String scheme, String userinfo, String host, int port) 343 throws URIException { 344 345 this(scheme, userinfo, host, port, null, null, null); 346 } 347 348 349 /*** 350 * Construct a general URI from the given components. 351 * 352 * @param scheme the scheme string 353 * @param userinfo the userinfo string 354 * @param host the host string 355 * @param port the port number 356 * @param path the path string 357 * @throws URIException If the new URI cannot be created. 358 * @see #getDefaultProtocolCharset 359 */ 360 public URI(String scheme, String userinfo, String host, int port, 361 String path) throws URIException { 362 363 this(scheme, userinfo, host, port, path, null, null); 364 } 365 366 367 /*** 368 * Construct a general URI from the given components. 369 * 370 * @param scheme the scheme string 371 * @param userinfo the userinfo string 372 * @param host the host string 373 * @param port the port number 374 * @param path the path string 375 * @param query the query string 376 * @throws URIException If the new URI cannot be created. 377 * @see #getDefaultProtocolCharset 378 */ 379 public URI(String scheme, String userinfo, String host, int port, 380 String path, String query) throws URIException { 381 382 this(scheme, userinfo, host, port, path, query, null); 383 } 384 385 386 /*** 387 * Construct a general URI from the given components. 388 * 389 * @param scheme the scheme string 390 * @param userinfo the userinfo string 391 * @param host the host string 392 * @param port the port number 393 * @param path the path string 394 * @param query the query string 395 * @param fragment the fragment string 396 * @throws URIException If the new URI cannot be created. 397 * @see #getDefaultProtocolCharset 398 */ 399 public URI(String scheme, String userinfo, String host, int port, 400 String path, String query, String fragment) throws URIException { 401 402 this(scheme, (host == null) ? null 403 : ((userinfo != null) ? userinfo + '@' : "") + host 404 + ((port != -1) ? ":" + port : ""), path, query, fragment); 405 } 406 407 408 /*** 409 * Construct a general URI from the given components. 410 * 411 * @param scheme the scheme string 412 * @param host the host string 413 * @param path the path string 414 * @param fragment the fragment string 415 * @throws URIException If the new URI cannot be created. 416 * @see #getDefaultProtocolCharset 417 */ 418 public URI(String scheme, String host, String path, String fragment) 419 throws URIException { 420 421 this(scheme, host, path, null, fragment); 422 } 423 424 425 /*** 426 * Construct a general URI with the given relative URI string. 427 * 428 * @param base the base URI 429 * @param relative the relative URI string 430 * @throws URIException If the new URI cannot be created. 431 */ 432 public URI(URI base, String relative) throws URIException { 433 this(base, new URI(relative)); 434 } 435 436 437 /*** 438 * Construct a general URI with the given relative URI. 439 * <p><blockquote><pre> 440 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 441 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 442 * </pre></blockquote><p> 443 * Resolving Relative References to Absolute Form. 444 * 445 * <strong>Examples of Resolving Relative URI References</strong> 446 * 447 * Within an object with a well-defined base URI of 448 * <p><blockquote><pre> 449 * http://a/b/c/d;p?q 450 * </pre></blockquote><p> 451 * the relative URI would be resolved as follows: 452 * 453 * Normal Examples 454 * 455 * <p><blockquote><pre> 456 * g:h = g:h 457 * g = http://a/b/c/g 458 * ./g = http://a/b/c/g 459 * g/ = http://a/b/c/g/ 460 * /g = http://a/g 461 * //g = http://g 462 * ?y = http://a/b/c/?y 463 * g?y = http://a/b/c/g?y 464 * #s = (current document)#s 465 * g#s = http://a/b/c/g#s 466 * g?y#s = http://a/b/c/g?y#s 467 * ;x = http://a/b/c/;x 468 * g;x = http://a/b/c/g;x 469 * g;x?y#s = http://a/b/c/g;x?y#s 470 * . = http://a/b/c/ 471 * ./ = http://a/b/c/ 472 * .. = http://a/b/ 473 * ../ = http://a/b/ 474 * ../g = http://a/b/g 475 * ../.. = http://a/ 476 * ../../ = http://a/ 477 * ../../g = http://a/g 478 * </pre></blockquote><p> 479 * 480 * Some URI schemes do not allow a hierarchical syntax matching the 481 * <hier_part> syntax, and thus cannot use relative references. 482 * 483 * @param base the base URI 484 * @param relative the relative URI 485 * @throws URIException If the new URI cannot be created. 486 */ 487 public URI(URI base, URI relative) throws URIException { 488 489 if (base._scheme == null) { 490 throw new URIException(URIException.PARSING, "base URI required"); 491 } 492 if (base._scheme != null) { 493 this._scheme = base._scheme; 494 this._authority = base._authority; 495 } 496 if (base._is_opaque_part || relative._is_opaque_part) { 497 this._scheme = base._scheme; 498 this._is_opaque_part = base._is_opaque_part 499 || relative._is_opaque_part; 500 this._opaque = relative._opaque; 501 this._fragment = relative._fragment; 502 this.setURI(); 503 return; 504 } 505 if (relative._scheme != null) { 506 this._scheme = relative._scheme; 507 this._is_net_path = relative._is_net_path; 508 this._authority = relative._authority; 509 if (relative._is_server) { 510 this._is_server = relative._is_server; 511 this._userinfo = relative._userinfo; 512 this._host = relative._host; 513 this._port = relative._port; 514 } else if (relative._is_reg_name) { 515 this._is_reg_name = relative._is_reg_name; 516 } 517 this._is_abs_path = relative._is_abs_path; 518 this._is_rel_path = relative._is_rel_path; 519 this._path = relative._path; 520 } else if (base._authority != null && relative._scheme == null) { 521 this._is_net_path = base._is_net_path; 522 this._authority = base._authority; 523 if (base._is_server) { 524 this._is_server = base._is_server; 525 this._userinfo = base._userinfo; 526 this._host = base._host; 527 this._port = base._port; 528 } else if (base._is_reg_name) { 529 this._is_reg_name = base._is_reg_name; 530 } 531 } 532 if (relative._authority != null) { 533 this._is_net_path = relative._is_net_path; 534 this._authority = relative._authority; 535 if (relative._is_server) { 536 this._is_server = relative._is_server; 537 this._userinfo = relative._userinfo; 538 this._host = relative._host; 539 this._port = relative._port; 540 } else if (relative._is_reg_name) { 541 this._is_reg_name = relative._is_reg_name; 542 } 543 this._is_abs_path = relative._is_abs_path; 544 this._is_rel_path = relative._is_rel_path; 545 this._path = relative._path; 546 } 547 // resolve the path and query if necessary 548 if (relative._scheme == null && relative._authority == null) { 549 if ((relative._path == null || relative._path.length == 0) 550 && relative._query == null) { 551 // handle a reference to the current document, see RFC 2396 552 // section 5.2 step 2 553 this._path = base._path; 554 this._query = base._query; 555 } else { 556 this._path = resolvePath(base._path, relative._path); 557 } 558 } 559 // base._query removed 560 if (relative._query != null) { 561 this._query = relative._query; 562 } 563 // base._fragment removed 564 if (relative._fragment != null) { 565 this._fragment = relative._fragment; 566 } 567 this.setURI(); 568 } 569 570 // --------------------------------------------------- Instance Variables 571 572 /*** Version ID for serialization */ 573 static final long serialVersionUID = 604752400577948726L; 574 575 576 /*** 577 * Cache the hash code for this URI. 578 */ 579 protected int hash = 0; 580 581 582 /*** 583 * This Uniform Resource Identifier (URI). 584 * The URI is always in an "escaped" form, since escaping or unescaping 585 * a completed URI might change its semantics. 586 */ 587 protected char[] _uri = null; 588 589 590 /*** 591 * The charset of the protocol used by this URI instance. 592 */ 593 protected String protocolCharset = null; 594 595 596 /*** 597 * The default charset of the protocol. RFC 2277, 2396 598 */ 599 protected static String defaultProtocolCharset = "UTF-8"; 600 601 602 /*** 603 * The default charset of the document. RFC 2277, 2396 604 * The platform's charset is used for the document by default. 605 */ 606 protected static String defaultDocumentCharset = null; 607 protected static String defaultDocumentCharsetByLocale = null; 608 protected static String defaultDocumentCharsetByPlatform = null; 609 // Static initializer for defaultDocumentCharset 610 static { 611 Locale locale = Locale.getDefault(); 612 // in order to support backward compatiblity 613 if (locale != null) { 614 defaultDocumentCharsetByLocale = 615 LocaleToCharsetMap.getCharset(locale); 616 // set the default document charset 617 defaultDocumentCharset = defaultDocumentCharsetByLocale; 618 } 619 // in order to support platform encoding 620 defaultDocumentCharsetByPlatform = 621 (String) AccessController.doPrivileged( 622 new GetPropertyAction("file.encoding")); 623 if (defaultDocumentCharset == null) { 624 // set the default document charset 625 defaultDocumentCharset = defaultDocumentCharsetByPlatform; 626 } 627 } 628 629 630 /*** 631 * The scheme. 632 */ 633 protected char[] _scheme = null; 634 635 636 /*** 637 * The opaque. 638 */ 639 protected char[] _opaque = null; 640 641 642 /*** 643 * The authority. 644 */ 645 protected char[] _authority = null; 646 647 648 /*** 649 * The userinfo. 650 */ 651 protected char[] _userinfo = null; 652 653 654 /*** 655 * The host. 656 */ 657 protected char[] _host = null; 658 659 660 /*** 661 * The port. 662 */ 663 protected int _port = -1; 664 665 666 /*** 667 * The path. 668 */ 669 protected char[] _path = null; 670 671 672 /*** 673 * The query. 674 */ 675 protected char[] _query = null; 676 677 678 /*** 679 * The fragment. 680 */ 681 protected char[] _fragment = null; 682 683 684 /*** 685 * The root path. 686 */ 687 protected static char[] rootPath = { '/' }; 688 689 // ---------------------- Generous characters for each component validation 690 691 /*** 692 * The percent "%" character always has the reserved purpose of being the 693 * escape indicator, it must be escaped as "%25" in order to be used as 694 * data within a URI. 695 */ 696 protected static final BitSet percent = new BitSet(256); 697 // Static initializer for percent 698 static { 699 percent.set('%'); 700 } 701 702 703 /*** 704 * BitSet for digit. 705 * <p><blockquote><pre> 706 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | 707 * "8" | "9" 708 * </pre></blockquote><p> 709 */ 710 protected static final BitSet digit = new BitSet(256); 711 // Static initializer for digit 712 static { 713 for (int i = '0'; i <= '9'; i++) { 714 digit.set(i); 715 } 716 } 717 718 719 /*** 720 * BitSet for alpha. 721 * <p><blockquote><pre> 722 * alpha = lowalpha | upalpha 723 * </pre></blockquote><p> 724 */ 725 protected static final BitSet alpha = new BitSet(256); 726 // Static initializer for alpha 727 static { 728 for (int i = 'a'; i <= 'z'; i++) { 729 alpha.set(i); 730 } 731 for (int i = 'A'; i <= 'Z'; i++) { 732 alpha.set(i); 733 } 734 } 735 736 737 /*** 738 * BitSet for alphanum (join of alpha & digit). 739 * <p><blockquote><pre> 740 * alphanum = alpha | digit 741 * </pre></blockquote><p> 742 */ 743 protected static final BitSet alphanum = new BitSet(256); 744 // Static initializer for alphanum 745 static { 746 alphanum.or(alpha); 747 alphanum.or(digit); 748 } 749 750 751 /*** 752 * BitSet for hex. 753 * <p><blockquote><pre> 754 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | 755 * "a" | "b" | "c" | "d" | "e" | "f" 756 * </pre></blockquote><p> 757 */ 758 protected static final BitSet hex = new BitSet(256); 759 // Static initializer for hex 760 static { 761 hex.or(digit); 762 for (int i = 'a'; i <= 'f'; i++) { 763 hex.set(i); 764 } 765 for (int i = 'A'; i <= 'F'; i++) { 766 hex.set(i); 767 } 768 } 769 770 771 /*** 772 * BitSet for escaped. 773 * <p><blockquote><pre> 774 * escaped = "%" hex hex 775 * </pre></blockquote><p> 776 */ 777 protected static final BitSet escaped = new BitSet(256); 778 // Static initializer for escaped 779 static { 780 escaped.or(percent); 781 escaped.or(hex); 782 } 783 784 785 /*** 786 * BitSet for mark. 787 * <p><blockquote><pre> 788 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | 789 * "(" | ")" 790 * </pre></blockquote><p> 791 */ 792 protected static final BitSet mark = new BitSet(256); 793 // Static initializer for mark 794 static { 795 mark.set('-'); 796 mark.set('_'); 797 mark.set('.'); 798 mark.set('!'); 799 mark.set('~'); 800 mark.set('*'); 801 mark.set('\''); 802 mark.set('('); 803 mark.set(')'); 804 } 805 806 807 /*** 808 * Data characters that are allowed in a URI but do not have a reserved 809 * purpose are called unreserved. 810 * <p><blockquote><pre> 811 * unreserved = alphanum | mark 812 * </pre></blockquote><p> 813 */ 814 protected static final BitSet unreserved = new BitSet(256); 815 // Static initializer for unreserved 816 static { 817 unreserved.or(alphanum); 818 unreserved.or(mark); 819 } 820 821 822 /*** 823 * BitSet for reserved. 824 * <p><blockquote><pre> 825 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | 826 * "$" | "," 827 * </pre></blockquote><p> 828 */ 829 protected static final BitSet reserved = new BitSet(256); 830 // Static initializer for reserved 831 static { 832 reserved.set(';'); 833 reserved.set('/'); 834 reserved.set('?'); 835 reserved.set(':'); 836 reserved.set('@'); 837 reserved.set('&'); 838 reserved.set('='); 839 reserved.set('+'); 840 reserved.set('$'); 841 reserved.set(','); 842 } 843 844 845 /*** 846 * BitSet for uric. 847 * <p><blockquote><pre> 848 * uric = reserved | unreserved | escaped 849 * </pre></blockquote><p> 850 */ 851 protected static final BitSet uric = new BitSet(256); 852 // Static initializer for uric 853 static { 854 uric.or(reserved); 855 uric.or(unreserved); 856 uric.or(escaped); 857 } 858 859 860 /*** 861 * BitSet for fragment (alias for uric). 862 * <p><blockquote><pre> 863 * fragment = *uric 864 * </pre></blockquote><p> 865 */ 866 protected static final BitSet fragment = uric; 867 868 869 /*** 870 * BitSet for query (alias for uric). 871 * <p><blockquote><pre> 872 * query = *uric 873 * </pre></blockquote><p> 874 */ 875 protected static final BitSet query = uric; 876 877 878 /*** 879 * BitSet for pchar. 880 * <p><blockquote><pre> 881 * pchar = unreserved | escaped | 882 * ":" | "@" | "&" | "=" | "+" | "$" | "," 883 * </pre></blockquote><p> 884 */ 885 protected static final BitSet pchar = new BitSet(256); 886 // Static initializer for pchar 887 static { 888 pchar.or(unreserved); 889 pchar.or(escaped); 890 pchar.set(':'); 891 pchar.set('@'); 892 pchar.set('&'); 893 pchar.set('='); 894 pchar.set('+'); 895 pchar.set('$'); 896 pchar.set(','); 897 } 898 899 900 /*** 901 * BitSet for param (alias for pchar). 902 * <p><blockquote><pre> 903 * param = *pchar 904 * </pre></blockquote><p> 905 */ 906 protected static final BitSet param = pchar; 907 908 909 /*** 910 * BitSet for segment. 911 * <p><blockquote><pre> 912 * segment = *pchar *( ";" param ) 913 * </pre></blockquote><p> 914 */ 915 protected static final BitSet segment = new BitSet(256); 916 // Static initializer for segment 917 static { 918 segment.or(pchar); 919 segment.set(';'); 920 segment.or(param); 921 } 922 923 924 /*** 925 * BitSet for path segments. 926 * <p><blockquote><pre> 927 * path_segments = segment *( "/" segment ) 928 * </pre></blockquote><p> 929 */ 930 protected static final BitSet path_segments = new BitSet(256); 931 // Static initializer for path_segments 932 static { 933 path_segments.set('/'); 934 path_segments.or(segment); 935 } 936 937 938 /*** 939 * URI absolute path. 940 * <p><blockquote><pre> 941 * abs_path = "/" path_segments 942 * </pre></blockquote><p> 943 */ 944 protected static final BitSet abs_path = new BitSet(256); 945 // Static initializer for abs_path 946 static { 947 abs_path.set('/'); 948 abs_path.or(path_segments); 949 } 950 951 952 /*** 953 * URI bitset for encoding typical non-slash characters. 954 * <p><blockquote><pre> 955 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | 956 * "&" | "=" | "+" | "$" | "," 957 * </pre></blockquote><p> 958 */ 959 protected static final BitSet uric_no_slash = new BitSet(256); 960 // Static initializer for uric_no_slash 961 static { 962 uric_no_slash.or(unreserved); 963 uric_no_slash.or(escaped); 964 uric_no_slash.set(';'); 965 uric_no_slash.set('?'); 966 uric_no_slash.set(';'); 967 uric_no_slash.set('@'); 968 uric_no_slash.set('&'); 969 uric_no_slash.set('='); 970 uric_no_slash.set('+'); 971 uric_no_slash.set('$'); 972 uric_no_slash.set(','); 973 } 974 975 976 /*** 977 * URI bitset that combines uric_no_slash and uric. 978 * <p><blockquote><pre> 979 * opaque_part = uric_no_slash *uric 980 * </pre></blockquote><p> 981 */ 982 protected static final BitSet opaque_part = new BitSet(256); 983 // Static initializer for opaque_part 984 static { 985 // it's generous. because first character must not include a slash 986 opaque_part.or(uric_no_slash); 987 opaque_part.or(uric); 988 } 989 990 991 /*** 992 * URI bitset that combines absolute path and opaque part. 993 * <p><blockquote><pre> 994 * path = [ abs_path | opaque_part ] 995 * </pre></blockquote><p> 996 */ 997 protected static final BitSet path = new BitSet(256); 998 // Static initializer for path 999 static { 1000 path.or(abs_path); 1001 path.or(opaque_part); 1002 } 1003 1004 1005 /*** 1006 * Port, a logical alias for digit. 1007 */ 1008 protected static final BitSet port = digit; 1009 1010 1011 /*** 1012 * Bitset that combines digit and dot fo IPv$address. 1013 * <p><blockquote><pre> 1014 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit 1015 * </pre></blockquote><p> 1016 */ 1017 protected static final BitSet IPv4address = new BitSet(256); 1018 // Static initializer for IPv4address 1019 static { 1020 IPv4address.or(digit); 1021 IPv4address.set('.'); 1022 } 1023 1024 1025 /*** 1026 * RFC 2373. 1027 * <p><blockquote><pre> 1028 * IPv6address = hexpart [ ":" IPv4address ] 1029 * </pre></blockquote><p> 1030 */ 1031 protected static final BitSet IPv6address = new BitSet(256); 1032 // Static initializer for IPv6address reference 1033 static { 1034 IPv6address.or(hex); // hexpart 1035 IPv6address.set(':'); 1036 IPv6address.or(IPv4address); 1037 } 1038 1039 1040 /*** 1041 * RFC 2732, 2373. 1042 * <p><blockquote><pre> 1043 * IPv6reference = "[" IPv6address "]" 1044 * </pre></blockquote><p> 1045 */ 1046 protected static final BitSet IPv6reference = new BitSet(256); 1047 // Static initializer for IPv6reference 1048 static { 1049 IPv6reference.set('['); 1050 IPv6reference.or(IPv6address); 1051 IPv6reference.set(']'); 1052 } 1053 1054 1055 /*** 1056 * BitSet for toplabel. 1057 * <p><blockquote><pre> 1058 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum 1059 * </pre></blockquote><p> 1060 */ 1061 protected static final BitSet toplabel = new BitSet(256); 1062 // Static initializer for toplabel 1063 static { 1064 toplabel.or(alphanum); 1065 toplabel.set('-'); 1066 } 1067 1068 1069 /*** 1070 * BitSet for domainlabel. 1071 * <p><blockquote><pre> 1072 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum 1073 * </pre></blockquote><p> 1074 */ 1075 protected static final BitSet domainlabel = toplabel; 1076 1077 1078 /*** 1079 * BitSet for hostname. 1080 * <p><blockquote><pre> 1081 * hostname = *( domainlabel "." ) toplabel [ "." ] 1082 * </pre></blockquote><p> 1083 */ 1084 protected static final BitSet hostname = new BitSet(256); 1085 // Static initializer for hostname 1086 static { 1087 hostname.or(toplabel); 1088 // hostname.or(domainlabel); 1089 hostname.set('.'); 1090 } 1091 1092 1093 /*** 1094 * BitSet for host. 1095 * <p><blockquote><pre> 1096 * host = hostname | IPv4address | IPv6reference 1097 * </pre></blockquote><p> 1098 */ 1099 protected static final BitSet host = new BitSet(256); 1100 // Static initializer for host 1101 static { 1102 host.or(hostname); 1103 // host.or(IPv4address); 1104 host.or(IPv6reference); // IPv4address 1105 } 1106 1107 1108 /*** 1109 * BitSet for hostport. 1110 * <p><blockquote><pre> 1111 * hostport = host [ ":" port ] 1112 * </pre></blockquote><p> 1113 */ 1114 protected static final BitSet hostport = new BitSet(256); 1115 // Static initializer for hostport 1116 static { 1117 hostport.or(host); 1118 hostport.set(':'); 1119 hostport.or(port); 1120 } 1121 1122 1123 /*** 1124 * Bitset for userinfo. 1125 * <p><blockquote><pre> 1126 * userinfo = *( unreserved | escaped | 1127 * ";" | ":" | "&" | "=" | "+" | "$" | "," ) 1128 * </pre></blockquote><p> 1129 */ 1130 protected static final BitSet userinfo = new BitSet(256); 1131 // Static initializer for userinfo 1132 static { 1133 userinfo.or(unreserved); 1134 userinfo.or(escaped); 1135 userinfo.set(';'); 1136 userinfo.set(':'); 1137 userinfo.set('&'); 1138 userinfo.set('='); 1139 userinfo.set('+'); 1140 userinfo.set('$'); 1141 userinfo.set(','); 1142 } 1143 1144 1145 /*** 1146 * BitSet for within the userinfo component like user and password. 1147 */ 1148 public static final BitSet within_userinfo = new BitSet(256); 1149 // Static initializer for within_userinfo 1150 static { 1151 within_userinfo.or(userinfo); 1152 within_userinfo.clear(';'); // reserved within authority 1153 within_userinfo.clear(':'); 1154 within_userinfo.clear('@'); 1155 within_userinfo.clear('?'); 1156 within_userinfo.clear('/'); 1157 } 1158 1159 1160 /*** 1161 * Bitset for server. 1162 * <p><blockquote><pre> 1163 * server = [ [ userinfo "@" ] hostport ] 1164 * </pre></blockquote><p> 1165 */ 1166 protected static final BitSet server = new BitSet(256); 1167 // Static initializer for server 1168 static { 1169 server.or(userinfo); 1170 server.set('@'); 1171 server.or(hostport); 1172 } 1173 1174 1175 /*** 1176 * BitSet for reg_name. 1177 * <p><blockquote><pre> 1178 * reg_name = 1*( unreserved | escaped | "$" | "," | 1179 * ";" | ":" | "@" | "&" | "=" | "+" ) 1180 * </pre></blockquote><p> 1181 */ 1182 protected static final BitSet reg_name = new BitSet(256); 1183 // Static initializer for reg_name 1184 static { 1185 reg_name.or(unreserved); 1186 reg_name.or(escaped); 1187 reg_name.set('$'); 1188 reg_name.set(','); 1189 reg_name.set(';'); 1190 reg_name.set(':'); 1191 reg_name.set('@'); 1192 reg_name.set('&'); 1193 reg_name.set('='); 1194 reg_name.set('+'); 1195 } 1196 1197 1198 /*** 1199 * BitSet for authority. 1200 * <p><blockquote><pre> 1201 * authority = server | reg_name 1202 * </pre></blockquote><p> 1203 */ 1204 protected static final BitSet authority = new BitSet(256); 1205 // Static initializer for authority 1206 static { 1207 authority.or(server); 1208 authority.or(reg_name); 1209 } 1210 1211 1212 /*** 1213 * BitSet for scheme. 1214 * <p><blockquote><pre> 1215 * scheme = alpha *( alpha | digit | "+" | "-" | "." ) 1216 * </pre></blockquote><p> 1217 */ 1218 protected static final BitSet scheme = new BitSet(256); 1219 // Static initializer for scheme 1220 static { 1221 scheme.or(alpha); 1222 scheme.or(digit); 1223 scheme.set('+'); 1224 scheme.set('-'); 1225 scheme.set('.'); 1226 } 1227 1228 1229 /*** 1230 * BitSet for rel_segment. 1231 * <p><blockquote><pre> 1232 * rel_segment = 1*( unreserved | escaped | 1233 * ";" | "@" | "&" | "=" | "+" | "$" | "," ) 1234 * </pre></blockquote><p> 1235 */ 1236 protected static final BitSet rel_segment = new BitSet(256); 1237 // Static initializer for rel_segment 1238 static { 1239 rel_segment.or(unreserved); 1240 rel_segment.or(escaped); 1241 rel_segment.set(';'); 1242 rel_segment.set('@'); 1243 rel_segment.set('&'); 1244 rel_segment.set('='); 1245 rel_segment.set('+'); 1246 rel_segment.set('$'); 1247 rel_segment.set(','); 1248 } 1249 1250 1251 /*** 1252 * BitSet for rel_path. 1253 * <p><blockquote><pre> 1254 * rel_path = rel_segment [ abs_path ] 1255 * </pre></blockquote><p> 1256 */ 1257 protected static final BitSet rel_path = new BitSet(256); 1258 // Static initializer for rel_path 1259 static { 1260 rel_path.or(rel_segment); 1261 rel_path.or(abs_path); 1262 } 1263 1264 1265 /*** 1266 * BitSet for net_path. 1267 * <p><blockquote><pre> 1268 * net_path = "//" authority [ abs_path ] 1269 * </pre></blockquote><p> 1270 */ 1271 protected static final BitSet net_path = new BitSet(256); 1272 // Static initializer for net_path 1273 static { 1274 net_path.set('/'); 1275 net_path.or(authority); 1276 net_path.or(abs_path); 1277 } 1278 1279 1280 /*** 1281 * BitSet for hier_part. 1282 * <p><blockquote><pre> 1283 * hier_part = ( net_path | abs_path ) [ "?" query ] 1284 * </pre></blockquote><p> 1285 */ 1286 protected static final BitSet hier_part = new BitSet(256); 1287 // Static initializer for hier_part 1288 static { 1289 hier_part.or(net_path); 1290 hier_part.or(abs_path); 1291 // hier_part.set('?'); aleady included 1292 hier_part.or(query); 1293 } 1294 1295 1296 /*** 1297 * BitSet for relativeURI. 1298 * <p><blockquote><pre> 1299 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 1300 * </pre></blockquote><p> 1301 */ 1302 protected static final BitSet relativeURI = new BitSet(256); 1303 // Static initializer for relativeURI 1304 static { 1305 relativeURI.or(net_path); 1306 relativeURI.or(abs_path); 1307 relativeURI.or(rel_path); 1308 // relativeURI.set('?'); aleady included 1309 relativeURI.or(query); 1310 } 1311 1312 1313 /*** 1314 * BitSet for absoluteURI. 1315 * <p><blockquote><pre> 1316 * absoluteURI = scheme ":" ( hier_part | opaque_part ) 1317 * </pre></blockquote><p> 1318 */ 1319 protected static final BitSet absoluteURI = new BitSet(256); 1320 // Static initializer for absoluteURI 1321 static { 1322 absoluteURI.or(scheme); 1323 absoluteURI.set(':'); 1324 absoluteURI.or(hier_part); 1325 absoluteURI.or(opaque_part); 1326 } 1327 1328 1329 /*** 1330 * BitSet for URI-reference. 1331 * <p><blockquote><pre> 1332 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 1333 * </pre></blockquote><p> 1334 */ 1335 protected static final BitSet URI_reference = new BitSet(256); 1336 // Static initializer for URI_reference 1337 static { 1338 URI_reference.or(absoluteURI); 1339 URI_reference.or(relativeURI); 1340 URI_reference.set('#'); 1341 URI_reference.or(fragment); 1342 } 1343 1344 // ---------------------------- Characters disallowed within the URI syntax 1345 // Excluded US-ASCII Characters are like control, space, delims and unwise 1346 1347 /*** 1348 * BitSet for control. 1349 */ 1350 public static final BitSet control = new BitSet(256); 1351 // Static initializer for control 1352 static { 1353 for (int i = 0; i <= 0x1F; i++) { 1354 control.set(i); 1355 } 1356 control.set(0x7F); 1357 } 1358 1359 /*** 1360 * BitSet for space. 1361 */ 1362 public static final BitSet space = new BitSet(256); 1363 // Static initializer for space 1364 static { 1365 space.set(0x20); 1366 } 1367 1368 1369 /*** 1370 * BitSet for delims. 1371 */ 1372 public static final BitSet delims = new BitSet(256); 1373 // Static initializer for delims 1374 static { 1375 delims.set('<'); 1376 delims.set('>'); 1377 delims.set('#'); 1378 delims.set('%'); 1379 delims.set('"'); 1380 } 1381 1382 1383 /*** 1384 * BitSet for unwise. 1385 */ 1386 public static final BitSet unwise = new BitSet(256); 1387 // Static initializer for unwise 1388 static { 1389 unwise.set('{'); 1390 unwise.set('}'); 1391 unwise.set('|'); 1392 unwise.set('//'); 1393 unwise.set('^'); 1394 unwise.set('['); 1395 unwise.set(']'); 1396 unwise.set('`'); 1397 } 1398 1399 1400 /*** 1401 * Disallowed rel_path before escaping. 1402 */ 1403 public static final BitSet disallowed_rel_path = new BitSet(256); 1404 // Static initializer for disallowed_rel_path 1405 static { 1406 disallowed_rel_path.or(uric); 1407 disallowed_rel_path.andNot(rel_path); 1408 } 1409 1410 1411 /*** 1412 * Disallowed opaque_part before escaping. 1413 */ 1414 public static final BitSet disallowed_opaque_part = new BitSet(256); 1415 // Static initializer for disallowed_opaque_part 1416 static { 1417 disallowed_opaque_part.or(uric); 1418 disallowed_opaque_part.andNot(opaque_part); 1419 } 1420 1421 // ----------------------- Characters allowed within and for each component 1422 1423 /*** 1424 * Those characters that are allowed for the authority component. 1425 */ 1426 public static final BitSet allowed_authority = new BitSet(256); 1427 // Static initializer for allowed_authority 1428 static { 1429 allowed_authority.or(authority); 1430 allowed_authority.clear('%'); 1431 } 1432 1433 1434 /*** 1435 * Those characters that are allowed for the opaque_part. 1436 */ 1437 public static final BitSet allowed_opaque_part = new BitSet(256); 1438 // Static initializer for allowed_opaque_part 1439 static { 1440 allowed_opaque_part.or(opaque_part); 1441 allowed_opaque_part.clear('%'); 1442 } 1443 1444 1445 /*** 1446 * Those characters that are allowed for the reg_name. 1447 */ 1448 public static final BitSet allowed_reg_name = new BitSet(256); 1449 // Static initializer for allowed_reg_name 1450 static { 1451 allowed_reg_name.or(reg_name); 1452 // allowed_reg_name.andNot(percent); 1453 allowed_reg_name.clear('%'); 1454 } 1455 1456 1457 /*** 1458 * Those characters that are allowed for the userinfo component. 1459 */ 1460 public static final BitSet allowed_userinfo = new BitSet(256); 1461 // Static initializer for allowed_userinfo 1462 static { 1463 allowed_userinfo.or(userinfo); 1464 // allowed_userinfo.andNot(percent); 1465 allowed_userinfo.clear('%'); 1466 } 1467 1468 1469 /*** 1470 * Those characters that are allowed for within the userinfo component. 1471 */ 1472 public static final BitSet allowed_within_userinfo = new BitSet(256); 1473 // Static initializer for allowed_within_userinfo 1474 static { 1475 allowed_within_userinfo.or(within_userinfo); 1476 allowed_within_userinfo.clear('%'); 1477 } 1478 1479 1480 /*** 1481 * Those characters that are allowed for the IPv6reference component. 1482 * The characters '[', ']' in IPv6reference should be excluded. 1483 */ 1484 public static final BitSet allowed_IPv6reference = new BitSet(256); 1485 // Static initializer for allowed_IPv6reference 1486 static { 1487 allowed_IPv6reference.or(IPv6reference); 1488 // allowed_IPv6reference.andNot(unwise); 1489 allowed_IPv6reference.clear('['); 1490 allowed_IPv6reference.clear(']'); 1491 } 1492 1493 1494 /*** 1495 * Those characters that are allowed for the host component. 1496 * The characters '[', ']' in IPv6reference should be excluded. 1497 */ 1498 public static final BitSet allowed_host = new BitSet(256); 1499 // Static initializer for allowed_host 1500 static { 1501 allowed_host.or(hostname); 1502 allowed_host.or(allowed_IPv6reference); 1503 } 1504 1505 1506 /*** 1507 * Those characters that are allowed for the authority component. 1508 */ 1509 public static final BitSet allowed_within_authority = new BitSet(256); 1510 // Static initializer for allowed_within_authority 1511 static { 1512 allowed_within_authority.or(server); 1513 allowed_within_authority.or(reg_name); 1514 allowed_within_authority.clear(';'); 1515 allowed_within_authority.clear(':'); 1516 allowed_within_authority.clear('@'); 1517 allowed_within_authority.clear('?'); 1518 allowed_within_authority.clear('/'); 1519 } 1520 1521 1522 /*** 1523 * Those characters that are allowed for the abs_path. 1524 */ 1525 public static final BitSet allowed_abs_path = new BitSet(256); 1526 // Static initializer for allowed_abs_path 1527 static { 1528 allowed_abs_path.or(abs_path); 1529 // allowed_abs_path.set('/'); // aleady included 1530 allowed_abs_path.andNot(percent); 1531 } 1532 1533 1534 /*** 1535 * Those characters that are allowed for the rel_path. 1536 */ 1537 public static final BitSet allowed_rel_path = new BitSet(256); 1538 // Static initializer for allowed_rel_path 1539 static { 1540 allowed_rel_path.or(rel_path); 1541 allowed_rel_path.clear('%'); 1542 } 1543 1544 1545 /*** 1546 * Those characters that are allowed within the path. 1547 */ 1548 public static final BitSet allowed_within_path = new BitSet(256); 1549 // Static initializer for allowed_within_path 1550 static { 1551 allowed_within_path.or(abs_path); 1552 allowed_within_path.clear('/'); 1553 allowed_within_path.clear(';'); 1554 allowed_within_path.clear('='); 1555 allowed_within_path.clear('?'); 1556 } 1557 1558 1559 /*** 1560 * Those characters that are allowed for the query component. 1561 */ 1562 public static final BitSet allowed_query = new BitSet(256); 1563 // Static initializer for allowed_query 1564 static { 1565 allowed_query.or(uric); 1566 allowed_query.clear('%'); 1567 } 1568 1569 1570 /*** 1571 * Those characters that are allowed within the query component. 1572 */ 1573 public static final BitSet allowed_within_query = new BitSet(256); 1574 // Static initializer for allowed_within_query 1575 static { 1576 allowed_within_query.or(allowed_query); 1577 allowed_within_query.andNot(reserved); // excluded 'reserved' 1578 } 1579 1580 1581 /*** 1582 * Those characters that are allowed for the fragment component. 1583 */ 1584 public static final BitSet allowed_fragment = new BitSet(256); 1585 // Static initializer for allowed_fragment 1586 static { 1587 allowed_fragment.or(uric); 1588 allowed_fragment.clear('%'); 1589 } 1590 1591 // ------------------------------------------- Flags for this URI-reference 1592 1593 // TODO: Figure out what all these variables are for and provide javadoc 1594 1595 // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] 1596 // absoluteURI = scheme ":" ( hier_part | opaque_part ) 1597 protected boolean _is_hier_part; 1598 protected boolean _is_opaque_part; 1599 // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] 1600 // hier_part = ( net_path | abs_path ) [ "?" query ] 1601 protected boolean _is_net_path; 1602 protected boolean _is_abs_path; 1603 protected boolean _is_rel_path; 1604 // net_path = "//" authority [ abs_path ] 1605 // authority = server | reg_name 1606 protected boolean _is_reg_name; 1607 protected boolean _is_server; // = _has_server 1608 // server = [ [ userinfo "@" ] hostport ] 1609 // host = hostname | IPv4address | IPv6reference 1610 protected boolean _is_hostname; 1611 protected boolean _is_IPv4address; 1612 protected boolean _is_IPv6reference; 1613 1614 // ------------------------------------------ Character and escape encoding 1615 1616 /*** 1617 * Encodes URI string. 1618 * 1619 * This is a two mapping, one from original characters to octets, and 1620 * subsequently a second from octets to URI characters: 1621 * <p><blockquote><pre> 1622 * original character sequence->octet sequence->URI character sequence 1623 * </pre></blockquote><p> 1624 * 1625 * An escaped octet is encoded as a character triplet, consisting of the 1626 * percent character "%" followed by the two hexadecimal digits 1627 * representing the octet code. For example, "%20" is the escaped 1628 * encoding for the US-ASCII space character. 1629 * <p> 1630 * Conversion from the local filesystem character set to UTF-8 will 1631 * normally involve a two step process. First convert the local character 1632 * set to the UCS; then convert the UCS to UTF-8. 1633 * The first step in the process can be performed by maintaining a mapping 1634 * table that includes the local character set code and the corresponding 1635 * UCS code. 1636 * The next step is to convert the UCS character code to the UTF-8 encoding. 1637 * <p> 1638 * Mapping between vendor codepages can be done in a very similar manner 1639 * as described above. 1640 * <p> 1641 * The only time escape encodings can allowedly be made is when a URI is 1642 * being created from its component parts. The escape and validate methods 1643 * are internally performed within this method. 1644 * 1645 * @param original the original character sequence 1646 * @param allowed those characters that are allowed within a component 1647 * @param charset the protocol charset 1648 * @return URI character sequence 1649 * @throws URIException null component or unsupported character encoding 1650 */ 1651 protected static char[] encode(String original, BitSet allowed, 1652 String charset) throws URIException { 1653 1654 // encode original to uri characters. 1655 if (original == null) { 1656 throw new URIException(URIException.PARSING, "null"); 1657 } 1658 // escape octet to uri characters. 1659 if (allowed == null) { 1660 throw new URIException(URIException.PARSING, 1661 "null allowed characters"); 1662 } 1663 byte[] octets; 1664 try { 1665 octets = original.getBytes(charset); 1666 } catch (UnsupportedEncodingException error) { 1667 throw new URIException(URIException.UNSUPPORTED_ENCODING, charset); 1668 } 1669 StringBuffer buf = new StringBuffer(octets.length); 1670 for (int i = 0; i < octets.length; i++) { 1671 char c = (char) octets[i]; 1672 if (allowed.get(c)) { 1673 buf.append(c); 1674 } else { 1675 buf.append('%'); 1676 byte b = octets[i]; // use the original byte value 1677 char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16); 1678 buf.append(Character.toUpperCase(hexadecimal)); // high 1679 hexadecimal = Character.forDigit(b & 0xF, 16); 1680 buf.append(Character.toUpperCase(hexadecimal)); // low 1681 } 1682 } 1683 1684 return buf.toString().toCharArray(); 1685 } 1686 1687 1688 /*** 1689 * Decodes URI encoded string. 1690 * 1691 * This is a two mapping, one from URI characters to octets, and 1692 * subsequently a second from octets to original characters: 1693 * <p><blockquote><pre> 1694 * URI character sequence->octet sequence->original character sequence 1695 * </pre></blockquote><p> 1696 * 1697 * A URI must be separated into its components before the escaped 1698 * characters within those components can be allowedly decoded. 1699 * <p> 1700 * Notice that there is a chance that URI characters that are non UTF-8 1701 * may be parsed as valid UTF-8. A recent non-scientific analysis found 1702 * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a 1703 * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0% 1704 * false reading. 1705 * <p> 1706 * The percent "%" character always has the reserved purpose of being 1707 * the escape indicator, it must be escaped as "%25" in order to be used 1708 * as data within a URI. 1709 * <p> 1710 * The unescape method is internally performed within this method. 1711 * 1712 * @param component the URI character sequence 1713 * @param charset the protocol charset 1714 * @return original character sequence 1715 * @throws URIException incomplete trailing escape pattern or unsupported 1716 * character encoding 1717 */ 1718 protected static String decode(char[] component, String charset) 1719 throws URIException { 1720 1721 // unescape uri characters to octets 1722 if (component == null) { 1723 return null; 1724 } 1725 1726 byte[] octets; 1727 try { 1728 octets = new String(component).getBytes(charset); 1729 } catch (UnsupportedEncodingException error) { 1730 throw new URIException(URIException.UNSUPPORTED_ENCODING, 1731 "not supported " + charset + " encoding"); 1732 } 1733 int length = octets.length; 1734 int oi = 0; // output index 1735 for (int ii = 0; ii < length; oi++) { 1736 byte aByte = (byte) octets[ii++]; 1737 if (aByte == '%' && ii + 2 <= length) { 1738 byte high = (byte) Character.digit((char) octets[ii++], 16); 1739 byte low = (byte) Character.digit((char) octets[ii++], 16); 1740 if (high == -1 || low == -1) { 1741 throw new URIException(URIException.ESCAPING, 1742 "incomplete trailing escape pattern"); 1743 1744 } 1745 aByte = (byte) ((high << 4) + low); 1746 } 1747 octets[oi] = (byte) aByte; 1748 } 1749 1750 String result; 1751 try { 1752 result = new String(octets, 0, oi, charset); 1753 } catch (UnsupportedEncodingException error) { 1754 throw new URIException(URIException.UNSUPPORTED_ENCODING, 1755 "not supported " + charset + " encoding"); 1756 } 1757 1758 return result; 1759 } 1760 1761 1762 /*** 1763 * Pre-validate the unescaped URI string within a specific component. 1764 * 1765 * @param component the component string within the component 1766 * @param disallowed those characters disallowed within the component 1767 * @return if true, it doesn't have the disallowed characters 1768 * if false, the component is undefined or an incorrect one 1769 */ 1770 protected boolean prevalidate(String component, BitSet disallowed) { 1771 // prevalidate the given component by disallowed characters 1772 if (component == null) { 1773 return false; // undefined 1774 } 1775 char[] target = component.toCharArray(); 1776 for (int i = 0; i < target.length; i++) { 1777 if (disallowed.get(target[i])) { 1778 return false; 1779 } 1780 } 1781 return true; 1782 } 1783 1784 1785 /*** 1786 * Validate the URI characters within a specific component. 1787 * The component must be performed after escape encoding. Or it doesn't 1788 * include escaped characters. 1789 * 1790 * @param component the characters sequence within the component 1791 * @param generous those characters that are allowed within a component 1792 * @return if true, it's the correct URI character sequence 1793 */ 1794 protected boolean validate(char[] component, BitSet generous) { 1795 // validate each component by generous characters 1796 return validate(component, 0, -1, generous); 1797 } 1798 1799 1800 /*** 1801 * Validate the URI characters within a specific component. 1802 * The component must be performed after escape encoding. Or it doesn't 1803 * include escaped characters. 1804 * <p> 1805 * It's not that much strict, generous. The strict validation might be 1806 * performed before being called this method. 1807 * 1808 * @param component the characters sequence within the component 1809 * @param soffset the starting offset of the given component 1810 * @param eoffset the ending offset of the given component 1811 * if -1, it means the length of the component 1812 * @param generous those characters that are allowed within a component 1813 * @return if true, it's the correct URI character sequence 1814 */ 1815 protected boolean validate(char[] component, int soffset, int eoffset, 1816 BitSet generous) { 1817 // validate each component by generous characters 1818 if (eoffset == -1) { 1819 eoffset = component.length - 1; 1820 } 1821 for (int i = soffset; i <= eoffset; i++) { 1822 if (!generous.get(component[i])) { 1823 return false; 1824 } 1825 } 1826 return true; 1827 } 1828 1829 1830 /*** 1831 * In order to avoid any possilbity of conflict with non-ASCII characters, 1832 * Parse a URI reference as a <code>String</code> with the character 1833 * encoding of the local system or the document. 1834 * <p> 1835 * The following line is the regular expression for breaking-down a URI 1836 * reference into its components. 1837 * <p><blockquote><pre> 1838 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1839 * 12 3 4 5 6 7 8 9 1840 * </pre></blockquote><p> 1841 * For example, matching the above expression to 1842 * http://jakarta.apache.org/ietf/uri/#Related 1843 * results in the following subexpression matches: 1844 * <p><blockquote><pre> 1845 * $1 = http: 1846 * scheme = $2 = http 1847 * $3 = //jakarta.apache.org 1848 * authority = $4 = jakarta.apache.org 1849 * path = $5 = /ietf/uri/ 1850 * $6 = <undefined> 1851 * query = $7 = <undefined> 1852 * $8 = #Related 1853 * fragment = $9 = Related 1854 * </pre></blockquote><p> 1855 * 1856 * @param original the original character sequence 1857 * @param escaped <code>true</code> if <code>original</code> is escaped 1858 * @throws URIException If an error occurs. 1859 */ 1860 protected void parseUriReference(String original, boolean escaped) 1861 throws URIException { 1862 1863 // validate and contruct the URI character sequence 1864 if (original == null) { 1865 throw new URIException("URI-Reference required"); 1866 } 1867 1868 /* @ 1869 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1870 */ 1871 String tmp = original.trim(); 1872 1873 /* 1874 * The length of the string sequence of characters. 1875 * It may not be equal to the length of the byte array. 1876 */ 1877 int length = tmp.length(); 1878 1879 /* 1880 * Remove the delimiters like angle brackets around an URI. 1881 */ 1882 if (length > 0) { 1883 char[] firstDelimiter = { tmp.charAt(0) }; 1884 if (validate(firstDelimiter, delims)) { 1885 if (length >= 2) { 1886 char[] lastDelimiter = { tmp.charAt(length - 1) }; 1887 if (validate(lastDelimiter, delims)) { 1888 tmp = tmp.substring(1, length - 1); 1889 length = length - 2; 1890 } 1891 } 1892 } 1893 } 1894 1895 /* 1896 * The starting index 1897 */ 1898 int from = 0; 1899 1900 /* 1901 * The test flag whether the URI is started from the path component. 1902 */ 1903 boolean isStartedFromPath = false; 1904 int atColon = tmp.indexOf(':'); 1905 int atSlash = tmp.indexOf('/'); 1906 if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) { 1907 isStartedFromPath = true; 1908 } 1909 1910 /* 1911 * <p><blockquote><pre> 1912 * @@@@@@@@ 1913 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1914 * </pre></blockquote><p> 1915 */ 1916 int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); 1917 if (at == -1) { 1918 at = 0; 1919 } 1920 1921 /* 1922 * Parse the scheme. 1923 * <p><blockquote><pre> 1924 * scheme = $2 = http 1925 * @ 1926 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1927 * </pre></blockquote><p> 1928 */ 1929 if (at < length && tmp.charAt(at) == ':') { 1930 char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); 1931 if (validate(target, scheme)) { 1932 _scheme = target; 1933 } else { 1934 throw new URIException("incorrect scheme"); 1935 } 1936 from = ++at; 1937 } 1938 1939 /* 1940 * Parse the authority component. 1941 * <p><blockquote><pre> 1942 * authority = $4 = jakarta.apache.org 1943 * @@ 1944 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1945 * </pre></blockquote><p> 1946 */ 1947 // Reset flags 1948 _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; 1949 if (0 <= at && at < length && tmp.charAt(at) == '/') { 1950 // Set flag 1951 _is_hier_part = true; 1952 if (at + 2 < length && tmp.charAt(at + 1) == '/') { 1953 // the temporary index to start the search from 1954 int next = indexFirstOf(tmp, "/?#", at + 2); 1955 if (next == -1) { 1956 next = (tmp.substring(at + 2).length() == 0) ? at + 2 1957 : tmp.length(); 1958 } 1959 parseAuthority(tmp.substring(at + 2, next), escaped); 1960 from = at = next; 1961 // Set flag 1962 _is_net_path = true; 1963 } 1964 if (from == at) { 1965 // Set flag 1966 _is_abs_path = true; 1967 } 1968 } 1969 1970 /* 1971 * Parse the path component. 1972 * <p><blockquote><pre> 1973 * path = $5 = /ietf/uri/ 1974 * @@@@@@ 1975 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 1976 * </pre></blockquote><p> 1977 */ 1978 if (from < length) { 1979 // rel_path = rel_segment [ abs_path ] 1980 int next = indexFirstOf(tmp, "?#", from); 1981 if (next == -1) { 1982 next = tmp.length(); 1983 } 1984 if (!_is_abs_path) { 1985 if (!escaped 1986 && prevalidate(tmp.substring(from, next), disallowed_rel_path) 1987 || escaped 1988 && validate(tmp.substring(from, next).toCharArray(), rel_path)) { 1989 // Set flag 1990 _is_rel_path = true; 1991 } else if (!escaped 1992 && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 1993 || escaped 1994 && validate(tmp.substring(from, next).toCharArray(), opaque_part)) { 1995 // Set flag 1996 _is_opaque_part = true; 1997 } else { 1998 // the path component may be empty 1999 _path = null; 2000 } 2001 } 2002 if (escaped) { 2003 setRawPath(tmp.substring(from, next).toCharArray()); 2004 } else { 2005 setPath(tmp.substring(from, next)); 2006 } 2007 at = next; 2008 } 2009 2010 // set the charset to do escape encoding 2011 String charset = getProtocolCharset(); 2012 2013 /* 2014 * Parse the query component. 2015 * <p><blockquote><pre> 2016 * query = $7 = <undefined> 2017 * @@@@@@@@@ 2018 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 2019 * </pre></blockquote><p> 2020 */ 2021 if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') { 2022 int next = tmp.indexOf('#', at + 1); 2023 if (next == -1) { 2024 next = tmp.length(); 2025 } 2026 _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() 2027 : encode(tmp.substring(at + 1, next), allowed_query, charset); 2028 at = next; 2029 } 2030 2031 /* 2032 * Parse the fragment component. 2033 * <p><blockquote><pre> 2034 * fragment = $9 = Related 2035 * @@@@@@@@ 2036 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 2037 * </pre></blockquote><p> 2038 */ 2039 if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') { 2040 if (at + 1 == length) { // empty fragment 2041 _fragment = "".toCharArray(); 2042 } else { 2043 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 2044 : encode(tmp.substring(at + 1), allowed_fragment, charset); 2045 } 2046 } 2047 2048 // set this URI. 2049 setURI(); 2050 } 2051 2052 2053 /*** 2054 * Get the earlier index that to be searched for the first occurrance in 2055 * one of any of the given string. 2056 * 2057 * @param s the string to be indexed 2058 * @param delims the delimiters used to index 2059 * @return the earlier index if there are delimiters 2060 */ 2061 protected int indexFirstOf(String s, String delims) { 2062 return indexFirstOf(s, delims, -1); 2063 } 2064 2065 2066 /*** 2067 * Get the earlier index that to be searched for the first occurrance in 2068 * one of any of the given string. 2069 * 2070 * @param s the string to be indexed 2071 * @param delims the delimiters used to index 2072 * @param offset the from index 2073 * @return the earlier index if there are delimiters 2074 */ 2075 protected int indexFirstOf(String s, String delims, int offset) { 2076 if (s == null || s.length() == 0) { 2077 return -1; 2078 } 2079 if (delims == null || delims.length() == 0) { 2080 return -1; 2081 } 2082 // check boundaries 2083 if (offset < 0) { 2084 offset = 0; 2085 } else if (offset > s.length()) { 2086 return -1; 2087 } 2088 // s is never null 2089 int min = s.length(); 2090 char[] delim = delims.toCharArray(); 2091 for (int i = 0; i < delim.length; i++) { 2092 int at = s.indexOf(delim[i], offset); 2093 if (at >= 0 && at < min) { 2094 min = at; 2095 } 2096 } 2097 return (min == s.length()) ? -1 : min; 2098 } 2099 2100 2101 /*** 2102 * Get the earlier index that to be searched for the first occurrance in 2103 * one of any of the given array. 2104 * 2105 * @param s the character array to be indexed 2106 * @param delim the delimiter used to index 2107 * @return the ealier index if there are a delimiter 2108 */ 2109 protected int indexFirstOf(char[] s, char delim) { 2110 return indexFirstOf(s, delim, 0); 2111 } 2112 2113 2114 /*** 2115 * Get the earlier index that to be searched for the first occurrance in 2116 * one of any of the given array. 2117 * 2118 * @param s the character array to be indexed 2119 * @param delim the delimiter used to index 2120 * @param offset The offset. 2121 * @return the ealier index if there is a delimiter 2122 */ 2123 protected int indexFirstOf(char[] s, char delim, int offset) { 2124 if (s == null || s.length == 0) { 2125 return -1; 2126 } 2127 // check boundaries 2128 if (offset < 0) { 2129 offset = 0; 2130 } else if (offset > s.length) { 2131 return -1; 2132 } 2133 for (int i = offset; i < s.length; i++) { 2134 if (s[i] == delim) { 2135 return i; 2136 } 2137 } 2138 return -1; 2139 } 2140 2141 2142 /*** 2143 * Parse the authority component. 2144 * 2145 * @param original the original character sequence of authority component 2146 * @param escaped <code>true</code> if <code>original</code> is escaped 2147 * @throws URIException If an error occurs. 2148 */ 2149 protected void parseAuthority(String original, boolean escaped) 2150 throws URIException { 2151 2152 // Reset flags 2153 _is_reg_name = _is_server = 2154 _is_hostname = _is_IPv4address = _is_IPv6reference = false; 2155 2156 // set the charset to do escape encoding 2157 String charset = getProtocolCharset(); 2158 2159 boolean hasPort = true; 2160 int from = 0; 2161 int next = original.indexOf('@'); 2162 if (next != -1) { // neither -1 and 0 2163 // each protocol extented from URI supports the specific userinfo 2164 _userinfo = (escaped) ? original.substring(0, next).toCharArray() 2165 : encode(original.substring(0, next), allowed_userinfo, 2166 charset); 2167 from = next + 1; 2168 } 2169 next = original.indexOf('[', from); 2170 if (next >= from) { 2171 next = original.indexOf(']', from); 2172 if (next == -1) { 2173 throw new URIException(URIException.PARSING, "IPv6reference"); 2174 } else { 2175 next++; 2176 } 2177 // In IPv6reference, '[', ']' should be excluded 2178 _host = (escaped) ? original.substring(from, next).toCharArray() 2179 : encode(original.substring(from, next), allowed_IPv6reference, 2180 charset); 2181 // Set flag 2182 _is_IPv6reference = true; 2183 } else { // only for !_is_IPv6reference 2184 next = original.indexOf(':', from); 2185 if (next == -1) { 2186 next = original.length(); 2187 hasPort = false; 2188 } 2189 // REMINDME: it doesn't need the pre-validation 2190 _host = original.substring(from, next).toCharArray(); 2191 if (validate(_host, IPv4address)) { 2192 // Set flag 2193 _is_IPv4address = true; 2194 } else if (validate(_host, hostname)) { 2195 // Set flag 2196 _is_hostname = true; 2197 } else { 2198 // Set flag 2199 _is_reg_name = true; 2200 } 2201 } 2202 if (_is_reg_name) { 2203 // Reset flags for a server-based naming authority 2204 _is_server = _is_hostname = _is_IPv4address = 2205 _is_IPv6reference = false; 2206 // set a registry-based naming authority 2207 _authority = (escaped) ? original.toString().toCharArray() 2208 : encode(original.toString(), allowed_reg_name, charset); 2209 } else { 2210 if (original.length() - 1 > next && hasPort 2211 && original.charAt(next) == ':') { // not empty 2212 from = next + 1; 2213 try { 2214 _port = Integer.parseInt(original.substring(from)); 2215 } catch (NumberFormatException error) { 2216 throw new URIException(URIException.PARSING, 2217 "invalid port number"); 2218 } 2219 } 2220 // set a server-based naming authority 2221 StringBuffer buf = new StringBuffer(); 2222 if (_userinfo != null) { // has_userinfo 2223 buf.append(_userinfo); 2224 buf.append('@'); 2225 } 2226 if (_host != null) { 2227 buf.append(_host); 2228 if (_port != -1) { 2229 buf.append(':'); 2230 buf.append(_port); 2231 } 2232 } 2233 _authority = buf.toString().toCharArray(); 2234 // Set flag 2235 _is_server = true; 2236 } 2237 } 2238 2239 2240 /*** 2241 * Once it's parsed successfully, set this URI. 2242 * 2243 * @see #getRawURI 2244 */ 2245 protected void setURI() { 2246 // set _uri 2247 StringBuffer buf = new StringBuffer(); 2248 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? 2249 if (_scheme != null) { 2250 buf.append(_scheme); 2251 buf.append(':'); 2252 } 2253 if (_is_net_path) { 2254 buf.append("//"); 2255 if (_authority != null) { // has_authority 2256 if (_userinfo != null) { // by default, remove userinfo part 2257 if (_host != null) { 2258 buf.append(_host); 2259 if (_port != -1) { 2260 buf.append(':'); 2261 buf.append(_port); 2262 } 2263 } 2264 } else { 2265 buf.append(_authority); 2266 } 2267 } 2268 } 2269 if (_opaque != null && _is_opaque_part) { 2270 buf.append(_opaque); 2271 } else if (_path != null) { 2272 // _is_hier_part or _is_relativeURI 2273 if (_path.length != 0) { 2274 buf.append(_path); 2275 } 2276 } 2277 if (_query != null) { // has_query 2278 buf.append('?'); 2279 buf.append(_query); 2280 } 2281 // ignore the fragment identifier 2282 _uri = buf.toString().toCharArray(); 2283 hash = 0; 2284 } 2285 2286 // ----------------------------------------------------------- Test methods 2287 2288 2289 /*** 2290 * Tell whether or not this URI is absolute. 2291 * 2292 * @return true iif this URI is absoluteURI 2293 */ 2294 public boolean isAbsoluteURI() { 2295 return (_scheme != null); 2296 } 2297 2298 2299 /*** 2300 * Tell whether or not this URI is relative. 2301 * 2302 * @return true iif this URI is relativeURI 2303 */ 2304 public boolean isRelativeURI() { 2305 return (_scheme == null); 2306 } 2307 2308 2309 /*** 2310 * Tell whether or not the absoluteURI of this URI is hier_part. 2311 * 2312 * @return true iif the absoluteURI is hier_part 2313 */ 2314 public boolean isHierPart() { 2315 return _is_hier_part; 2316 } 2317 2318 2319 /*** 2320 * Tell whether or not the absoluteURI of this URI is opaque_part. 2321 * 2322 * @return true iif the absoluteURI is opaque_part 2323 */ 2324 public boolean isOpaquePart() { 2325 return _is_opaque_part; 2326 } 2327 2328 2329 /*** 2330 * Tell whether or not the relativeURI or heir_part of this URI is net_path. 2331 * It's the same function as the has_authority() method. 2332 * 2333 * @return true iif the relativeURI or heir_part is net_path 2334 * @see #hasAuthority 2335 */ 2336 public boolean isNetPath() { 2337 return _is_net_path || (_authority != null); 2338 } 2339 2340 2341 /*** 2342 * Tell whether or not the relativeURI or hier_part of this URI is abs_path. 2343 * 2344 * @return true iif the relativeURI or hier_part is abs_path 2345 */ 2346 public boolean isAbsPath() { 2347 return _is_abs_path; 2348 } 2349 2350 2351 /*** 2352 * Tell whether or not the relativeURI of this URI is rel_path. 2353 * 2354 * @return true iif the relativeURI is rel_path 2355 */ 2356 public boolean isRelPath() { 2357 return _is_rel_path; 2358 } 2359 2360 2361 /*** 2362 * Tell whether or not this URI has authority. 2363 * It's the same function as the is_net_path() method. 2364 * 2365 * @return true iif this URI has authority 2366 * @see #isNetPath 2367 */ 2368 public boolean hasAuthority() { 2369 return (_authority != null) || _is_net_path; 2370 } 2371 2372 /*** 2373 * Tell whether or not the authority component of this URI is reg_name. 2374 * 2375 * @return true iif the authority component is reg_name 2376 */ 2377 public boolean isRegName() { 2378 return _is_reg_name; 2379 } 2380 2381 2382 /*** 2383 * Tell whether or not the authority component of this URI is server. 2384 * 2385 * @return true iif the authority component is server 2386 */ 2387 public boolean isServer() { 2388 return _is_server; 2389 } 2390 2391 2392 /*** 2393 * Tell whether or not this URI has userinfo. 2394 * 2395 * @return true iif this URI has userinfo 2396 */ 2397 public boolean hasUserinfo() { 2398 return (_userinfo != null); 2399 } 2400 2401 2402 /*** 2403 * Tell whether or not the host part of this URI is hostname. 2404 * 2405 * @return true iif the host part is hostname 2406 */ 2407 public boolean isHostname() { 2408 return _is_hostname; 2409 } 2410 2411 2412 /*** 2413 * Tell whether or not the host part of this URI is IPv4address. 2414 * 2415 * @return true iif the host part is IPv4address 2416 */ 2417 public boolean isIPv4address() { 2418 return _is_IPv4address; 2419 } 2420 2421 2422 /*** 2423 * Tell whether or not the host part of this URI is IPv6reference. 2424 * 2425 * @return true iif the host part is IPv6reference 2426 */ 2427 public boolean isIPv6reference() { 2428 return _is_IPv6reference; 2429 } 2430 2431 2432 /*** 2433 * Tell whether or not this URI has query. 2434 * 2435 * @return true iif this URI has query 2436 */ 2437 public boolean hasQuery() { 2438 return (_query != null); 2439 } 2440 2441 2442 /*** 2443 * Tell whether or not this URI has fragment. 2444 * 2445 * @return true iif this URI has fragment 2446 */ 2447 public boolean hasFragment() { 2448 return (_fragment != null); 2449 } 2450 2451 2452 // ---------------------------------------------------------------- Charset 2453 2454 2455 /*** 2456 * Set the default charset of the protocol. 2457 * <p> 2458 * The character set used to store files SHALL remain a local decision and 2459 * MAY depend on the capability of local operating systems. Prior to the 2460 * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format 2461 * and UTF-8 encoded. This approach, while allowing international exchange 2462 * of URIs, will still allow backward compatibility with older systems 2463 * because the code set positions for ASCII characters are identical to the 2464 * one byte sequence in UTF-8. 2465 * <p> 2466 * An individual URI scheme may require a single charset, define a default 2467 * charset, or provide a way to indicate the charset used. 2468 * 2469 * <p> 2470 * Always all the time, the setter method is always succeeded and throws 2471 * <code>DefaultCharsetChanged</code> exception. 2472 * 2473 * So API programmer must follow the following way: 2474 * <code><pre> 2475 * import org.apache.util.URI$DefaultCharsetChanged; 2476 * . 2477 * . 2478 * . 2479 * try { 2480 * URI.setDefaultProtocolCharset("UTF-8"); 2481 * } catch (DefaultCharsetChanged cc) { 2482 * // CASE 1: the exception could be ignored, when it is set by user 2483 * if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) { 2484 * // CASE 2: let user know the default protocol charset changed 2485 * } else { 2486 * // CASE 2: let user know the default document charset changed 2487 * } 2488 * } 2489 * </pre></code> 2490 * 2491 * The API programmer is responsible to set the correct charset. 2492 * And each application should remember its own charset to support. 2493 * 2494 * @param charset the default charset for each protocol 2495 * @throws DefaultCharsetChanged default charset changed 2496 */ 2497 public static void setDefaultProtocolCharset(String charset) 2498 throws DefaultCharsetChanged { 2499 2500 defaultProtocolCharset = charset; 2501 throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET, 2502 "the default protocol charset changed"); 2503 } 2504 2505 2506 /*** 2507 * Get the default charset of the protocol. 2508 * <p> 2509 * An individual URI scheme may require a single charset, define a default 2510 * charset, or provide a way to indicate the charset used. 2511 * <p> 2512 * To work globally either requires support of a number of character sets 2513 * and to be able to convert between them, or the use of a single preferred 2514 * character set. 2515 * For support of global compatibility it is STRONGLY RECOMMENDED that 2516 * clients and servers use UTF-8 encoding when exchanging URIs. 2517 * 2518 * @return the default charset string 2519 */ 2520 public static String getDefaultProtocolCharset() { 2521 return defaultProtocolCharset; 2522 } 2523 2524 2525 /*** 2526 * Get the protocol charset used by this current URI instance. 2527 * It was set by the constructor for this instance. If it was not set by 2528 * contructor, it will return the default protocol charset. 2529 * 2530 * @return the protocol charset string 2531 * @see #getDefaultProtocolCharset 2532 */ 2533 public String getProtocolCharset() { 2534 return (protocolCharset != null) 2535 ? protocolCharset 2536 : defaultProtocolCharset; 2537 } 2538 2539 2540 /*** 2541 * Set the default charset of the document. 2542 * <p> 2543 * Notice that it will be possible to contain mixed characters (e.g. 2544 * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional 2545 * display of these character sets, the protocol charset could be simply 2546 * used again. Because it's not yet implemented that the insertion of BIDI 2547 * control characters at different points during composition is extracted. 2548 * <p> 2549 * 2550 * Always all the time, the setter method is always succeeded and throws 2551 * <code>DefaultCharsetChanged</code> exception. 2552 * 2553 * So API programmer must follow the following way: 2554 * <code><pre> 2555 * import org.apache.util.URI$DefaultCharsetChanged; 2556 * . 2557 * . 2558 * . 2559 * try { 2560 * URI.setDefaultDocumentCharset("EUC-KR"); 2561 * } catch (DefaultCharsetChanged cc) { 2562 * // CASE 1: the exception could be ignored, when it is set by user 2563 * if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) { 2564 * // CASE 2: let user know the default document charset changed 2565 * } else { 2566 * // CASE 2: let user know the default protocol charset changed 2567 * } 2568 * } 2569 * </pre></code> 2570 * 2571 * The API programmer is responsible to set the correct charset. 2572 * And each application should remember its own charset to support. 2573 * 2574 * @param charset the default charset for the document 2575 * @throws DefaultCharsetChanged default charset changed 2576 */ 2577 public static void setDefaultDocumentCharset(String charset) 2578 throws DefaultCharsetChanged { 2579 2580 defaultDocumentCharset = charset; 2581 throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET, 2582 "the default document charset changed"); 2583 } 2584 2585 2586 /*** 2587 * Get the recommended default charset of the document. 2588 * 2589 * @return the default charset string 2590 */ 2591 public static String getDefaultDocumentCharset() { 2592 return defaultDocumentCharset; 2593 } 2594 2595 2596 /*** 2597 * Get the default charset of the document by locale. 2598 * 2599 * @return the default charset string by locale 2600 */ 2601 public static String getDefaultDocumentCharsetByLocale() { 2602 return defaultDocumentCharsetByLocale; 2603 } 2604 2605 2606 /*** 2607 * Get the default charset of the document by platform. 2608 * 2609 * @return the default charset string by platform 2610 */ 2611 public static String getDefaultDocumentCharsetByPlatform() { 2612 return defaultDocumentCharsetByPlatform; 2613 } 2614 2615 // ------------------------------------------------------------- The scheme 2616 2617 /*** 2618 * Get the scheme. 2619 * 2620 * @return the scheme 2621 */ 2622 public char[] getRawScheme() { 2623 return _scheme; 2624 } 2625 2626 2627 /*** 2628 * Get the scheme. 2629 * 2630 * @return the scheme 2631 * null if undefined scheme 2632 */ 2633 public String getScheme() { 2634 return (_scheme == null) ? null : new String(_scheme); 2635 } 2636 2637 // ---------------------------------------------------------- The authority 2638 2639 /*** 2640 * Set the authority. It can be one type of server, hostport, hostname, 2641 * IPv4address, IPv6reference and reg_name. 2642 * <p><blockquote><pre> 2643 * authority = server | reg_name 2644 * </pre></blockquote><p> 2645 * 2646 * @param escapedAuthority the raw escaped authority 2647 * @throws URIException If {@link 2648 * #parseAuthority(java.lang.String,boolean)} fails 2649 * @throws NullPointerException null authority 2650 */ 2651 public void setRawAuthority(char[] escapedAuthority) 2652 throws URIException, NullPointerException { 2653 2654 parseAuthority(new String(escapedAuthority), true); 2655 setURI(); 2656 } 2657 2658 2659 /*** 2660 * Set the authority. It can be one type of server, hostport, hostname, 2661 * IPv4address, IPv6reference and reg_name. 2662 * Note that there is no setAuthority method by the escape encoding reason. 2663 * 2664 * @param escapedAuthority the escaped authority string 2665 * @throws URIException If {@link 2666 * #parseAuthority(java.lang.String,boolean)} fails 2667 */ 2668 public void setEscapedAuthority(String escapedAuthority) 2669 throws URIException { 2670 2671 parseAuthority(escapedAuthority, true); 2672 setURI(); 2673 } 2674 2675 2676 /*** 2677 * Get the raw-escaped authority. 2678 * 2679 * @return the raw-escaped authority 2680 */ 2681 public char[] getRawAuthority() { 2682 return _authority; 2683 } 2684 2685 2686 /*** 2687 * Get the escaped authority. 2688 * 2689 * @return the escaped authority 2690 */ 2691 public String getEscapedAuthority() { 2692 return (_authority == null) ? null : new String(_authority); 2693 } 2694 2695 2696 /*** 2697 * Get the authority. 2698 * 2699 * @return the authority 2700 * @throws URIException If {@link #decode} fails 2701 */ 2702 public String getAuthority() throws URIException { 2703 return (_authority == null) ? null : decode(_authority, 2704 getProtocolCharset()); 2705 } 2706 2707 // ----------------------------------------------------------- The userinfo 2708 2709 /*** 2710 * Get the raw-escaped userinfo. 2711 * 2712 * @return the raw-escaped userinfo 2713 * @see #getAuthority 2714 */ 2715 public char[] getRawUserinfo() { 2716 return _userinfo; 2717 } 2718 2719 2720 /*** 2721 * Get the escaped userinfo. 2722 * 2723 * @return the escaped userinfo 2724 * @see #getAuthority 2725 */ 2726 public String getEscapedUserinfo() { 2727 return (_userinfo == null) ? null : new String(_userinfo); 2728 } 2729 2730 2731 /*** 2732 * Get the userinfo. 2733 * 2734 * @return the userinfo 2735 * @throws URIException If {@link #decode} fails 2736 * @see #getAuthority 2737 */ 2738 public String getUserinfo() throws URIException { 2739 return (_userinfo == null) ? null : decode(_userinfo, 2740 getProtocolCharset()); 2741 } 2742 2743 // --------------------------------------------------------------- The host 2744 2745 /*** 2746 * Get the host. 2747 * <p><blockquote><pre> 2748 * host = hostname | IPv4address | IPv6reference 2749 * </pre></blockquote><p> 2750 * 2751 * @return the host 2752 * @see #getAuthority 2753 */ 2754 public char[] getRawHost() { 2755 return _host; 2756 } 2757 2758 2759 /*** 2760 * Get the host. 2761 * <p><blockquote><pre> 2762 * host = hostname | IPv4address | IPv6reference 2763 * </pre></blockquote><p> 2764 * 2765 * @return the host 2766 * @throws URIException If {@link #decode} fails 2767 * @see #getAuthority 2768 */ 2769 public String getHost() throws URIException { 2770 return decode(_host, getProtocolCharset()); 2771 } 2772 2773 // --------------------------------------------------------------- The port 2774 2775 /*** 2776 * Get the port. In order to get the specfic default port, the specific 2777 * protocol-supported class extended from the URI class should be used. 2778 * It has the server-based naming authority. 2779 * 2780 * @return the port 2781 * if -1, it has the default port for the scheme or the server-based 2782 * naming authority is not supported in the specific URI. 2783 */ 2784 public int getPort() { 2785 return _port; 2786 } 2787 2788 // --------------------------------------------------------------- The path 2789 2790 /*** 2791 * Set the raw-escaped path. 2792 * 2793 * @param escapedPath the path character sequence 2794 * @throws URIException encoding error or not proper for initial instance 2795 * @see #encode 2796 */ 2797 public void setRawPath(char[] escapedPath) throws URIException { 2798 if (escapedPath == null || escapedPath.length == 0) { 2799 _path = _opaque = escapedPath; 2800 setURI(); 2801 return; 2802 } 2803 // remove the fragment identifier 2804 escapedPath = removeFragmentIdentifier(escapedPath); 2805 if (_is_net_path || _is_abs_path) { 2806 if (escapedPath[0] != '/') { 2807 throw new URIException(URIException.PARSING, 2808 "not absolute path"); 2809 } 2810 if (!validate(escapedPath, abs_path)) { 2811 throw new URIException(URIException.ESCAPING, 2812 "escaped absolute path not valid"); 2813 } 2814 _path = escapedPath; 2815 } else if (_is_rel_path) { 2816 int at = indexFirstOf(escapedPath, '/'); 2817 if (at == 0) { 2818 throw new URIException(URIException.PARSING, "incorrect path"); 2819 } 2820 if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) 2821 && !validate(escapedPath, at, -1, abs_path) 2822 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) { 2823 2824 throw new URIException(URIException.ESCAPING, 2825 "escaped relative path not valid"); 2826 } 2827 _path = escapedPath; 2828 } else if (_is_opaque_part) { 2829 if (!uric_no_slash.get(escapedPath[0]) 2830 && !validate(escapedPath, 1, -1, uric)) { 2831 throw new URIException(URIException.ESCAPING, 2832 "escaped opaque part not valid"); 2833 } 2834 _opaque = escapedPath; 2835 } else { 2836 throw new URIException(URIException.PARSING, "incorrect path"); 2837 } 2838 setURI(); 2839 } 2840 2841 2842 /*** 2843 * Set the escaped path. 2844 * 2845 * @param escapedPath the escaped path string 2846 * @throws URIException encoding error or not proper for initial instance 2847 * @see #encode 2848 */ 2849 public void setEscapedPath(String escapedPath) throws URIException { 2850 if (escapedPath == null) { 2851 _path = _opaque = null; 2852 setURI(); 2853 return; 2854 } 2855 setRawPath(escapedPath.toCharArray()); 2856 } 2857 2858 2859 /*** 2860 * Set the path. 2861 * 2862 * @param path the path string 2863 * @throws URIException set incorrectly or fragment only 2864 * @see #encode 2865 */ 2866 public void setPath(String path) throws URIException { 2867 2868 if (path == null || path.length() == 0) { 2869 _path = _opaque = (path == null) ? null : path.toCharArray(); 2870 setURI(); 2871 return; 2872 } 2873 // set the charset to do escape encoding 2874 String charset = getProtocolCharset(); 2875 2876 if (_is_net_path || _is_abs_path) { 2877 _path = encode(path, allowed_abs_path, charset); 2878 } else if (_is_rel_path) { 2879 StringBuffer buff = new StringBuffer(path.length()); 2880 int at = path.indexOf('/'); 2881 if (at == 0) { // never 0 2882 throw new URIException(URIException.PARSING, 2883 "incorrect relative path"); 2884 } 2885 if (at > 0) { 2886 buff.append(encode(path.substring(0, at), allowed_rel_path, 2887 charset)); 2888 buff.append(encode(path.substring(at), allowed_abs_path, 2889 charset)); 2890 } else { 2891 buff.append(encode(path, allowed_rel_path, charset)); 2892 } 2893 _path = buff.toString().toCharArray(); 2894 } else if (_is_opaque_part) { 2895 StringBuffer buf = new StringBuffer(); 2896 buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset)); 2897 buf.insert(1, encode(path.substring(1), uric, charset)); 2898 _opaque = buf.toString().toCharArray(); 2899 } else { 2900 throw new URIException(URIException.PARSING, "incorrect path"); 2901 } 2902 setURI(); 2903 } 2904 2905 2906 /*** 2907 * Resolve the base and relative path. 2908 * 2909 * @param basePath a character array of the basePath 2910 * @param relPath a character array of the relPath 2911 * @return the resolved path 2912 * @throws URIException no more higher path level to be resolved 2913 */ 2914 protected char[] resolvePath(char[] basePath, char[] relPath) 2915 throws URIException { 2916 2917 // REMINDME: paths are never null 2918 String base = (basePath == null) ? "" : new String(basePath); 2919 int at = base.lastIndexOf('/'); 2920 if (at != -1) { 2921 basePath = base.substring(0, at + 1).toCharArray(); 2922 } 2923 // _path could be empty 2924 if (relPath == null || relPath.length == 0) { 2925 return normalize(basePath); 2926 } else if (relPath[0] == '/') { 2927 return normalize(relPath); 2928 } else { 2929 StringBuffer buff = new StringBuffer(base.length() 2930 + relPath.length); 2931 buff.append((at != -1) ? base.substring(0, at + 1) : "/"); 2932 buff.append(relPath); 2933 return normalize(buff.toString().toCharArray()); 2934 } 2935 } 2936 2937 2938 /*** 2939 * Get the raw-escaped current hierarchy level in the given path. 2940 * If the last namespace is a collection, the slash mark ('/') should be 2941 * ended with at the last character of the path string. 2942 * 2943 * @param path the path 2944 * @return the current hierarchy level 2945 * @throws URIException no hierarchy level 2946 */ 2947 protected char[] getRawCurrentHierPath(char[] path) throws URIException { 2948 2949 if (_is_opaque_part) { 2950 throw new URIException(URIException.PARSING, "no hierarchy level"); 2951 } 2952 if (path == null) { 2953 throw new URIException(URIException.PARSING, "empty path"); 2954 } 2955 String buff = new String(path); 2956 int first = buff.indexOf('/'); 2957 int last = buff.lastIndexOf('/'); 2958 if (last == 0) { 2959 return rootPath; 2960 } else if (first != last && last != -1) { 2961 return buff.substring(0, last).toCharArray(); 2962 } 2963 // FIXME: it could be a document on the server side 2964 return path; 2965 } 2966 2967 2968 /*** 2969 * Get the raw-escaped current hierarchy level. 2970 * 2971 * @return the raw-escaped current hierarchy level 2972 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 2973 */ 2974 public char[] getRawCurrentHierPath() throws URIException { 2975 return (_path == null) ? null : getRawCurrentHierPath(_path); 2976 } 2977 2978 2979 /*** 2980 * Get the escaped current hierarchy level. 2981 * 2982 * @return the escaped current hierarchy level 2983 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 2984 */ 2985 public String getEscapedCurrentHierPath() throws URIException { 2986 char[] path = getRawCurrentHierPath(); 2987 return (path == null) ? null : new String(path); 2988 } 2989 2990 2991 /*** 2992 * Get the current hierarchy level. 2993 * 2994 * @return the current hierarchy level 2995 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 2996 * @see #decode 2997 */ 2998 public String getCurrentHierPath() throws URIException { 2999 char[] path = getRawCurrentHierPath(); 3000 return (path == null) ? null : decode(path, getProtocolCharset()); 3001 } 3002 3003 3004 /*** 3005 * Get the level above the this hierarchy level. 3006 * 3007 * @return the raw above hierarchy level 3008 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 3009 */ 3010 public char[] getRawAboveHierPath() throws URIException { 3011 char[] path = getRawCurrentHierPath(); 3012 return (path == null) ? null : getRawCurrentHierPath(path); 3013 } 3014 3015 3016 /*** 3017 * Get the level above the this hierarchy level. 3018 * 3019 * @return the raw above hierarchy level 3020 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 3021 */ 3022 public String getEscapedAboveHierPath() throws URIException { 3023 char[] path = getRawAboveHierPath(); 3024 return (path == null) ? null : new String(path); 3025 } 3026 3027 3028 /*** 3029 * Get the level above the this hierarchy level. 3030 * 3031 * @return the above hierarchy level 3032 * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails. 3033 * @see #decode 3034 */ 3035 public String getAboveHierPath() throws URIException { 3036 char[] path = getRawAboveHierPath(); 3037 return (path == null) ? null : decode(path, getProtocolCharset()); 3038 } 3039 3040 3041 /*** 3042 * Get the raw-escaped path. 3043 * <p><blockquote><pre> 3044 * path = [ abs_path | opaque_part ] 3045 * </pre></blockquote><p> 3046 * 3047 * @return the raw-escaped path 3048 */ 3049 public char[] getRawPath() { 3050 return _is_opaque_part ? _opaque : _path; 3051 } 3052 3053 3054 /*** 3055 * Get the escaped path. 3056 * <p><blockquote><pre> 3057 * path = [ abs_path | opaque_part ] 3058 * abs_path = "/" path_segments 3059 * opaque_part = uric_no_slash *uric 3060 * </pre></blockquote><p> 3061 * 3062 * @return the escaped path string 3063 */ 3064 public String getEscapedPath() { 3065 char[] path = getRawPath(); 3066 return (path == null) ? null : new String(path); 3067 } 3068 3069 3070 /*** 3071 * Get the path. 3072 * <p><blockquote><pre> 3073 * path = [ abs_path | opaque_part ] 3074 * </pre></blockquote><p> 3075 * @return the path string 3076 * @throws URIException If {@link #decode} fails. 3077 * @see #decode 3078 */ 3079 public String getPath() throws URIException { 3080 char[] path = getRawPath(); 3081 return (path == null) ? null : decode(path, getProtocolCharset()); 3082 } 3083 3084 3085 /*** 3086 * Get the raw-escaped basename of the path. 3087 * 3088 * @return the raw-escaped basename 3089 */ 3090 public char[] getRawName() { 3091 if (_path == null) { 3092 return null; 3093 } 3094 3095 int at = 0; 3096 for (int i = _path.length - 1; i >= 0; i--) { 3097 if (_path[i] == '/') { 3098 at = i + 1; 3099 break; 3100 } 3101 } 3102 int len = _path.length - at; 3103 char[] basename = new char[len]; 3104 System.arraycopy(_path, at, basename, 0, len); 3105 return basename; 3106 } 3107 3108 3109 /*** 3110 * Get the escaped basename of the path. 3111 * 3112 * @return the escaped basename string 3113 */ 3114 public String getEscapedName() { 3115 char[] basename = getRawName(); 3116 return (basename == null) ? null : new String(basename); 3117 } 3118 3119 3120 /*** 3121 * Get the basename of the path. 3122 * 3123 * @return the basename string 3124 * @throws URIException incomplete trailing escape pattern or unsupported 3125 * character encoding 3126 * @see #decode 3127 */ 3128 public String getName() throws URIException { 3129 char[] basename = getRawName(); 3130 return (basename == null) ? null : decode(getRawName(), 3131 getProtocolCharset()); 3132 } 3133 3134 // ----------------------------------------------------- The path and query 3135 3136 /*** 3137 * Get the raw-escaped path and query. 3138 * 3139 * @return the raw-escaped path and query 3140 */ 3141 public char[] getRawPathQuery() { 3142 3143 if (_path == null && _query == null) { 3144 return null; 3145 } 3146 StringBuffer buff = new StringBuffer(); 3147 if (_path != null) { 3148 buff.append(_path); 3149 } 3150 if (_query != null) { 3151 buff.append('?'); 3152 buff.append(_query); 3153 } 3154 return buff.toString().toCharArray(); 3155 } 3156 3157 3158 /*** 3159 * Get the escaped query. 3160 * 3161 * @return the escaped path and query string 3162 */ 3163 public String getEscapedPathQuery() { 3164 char[] rawPathQuery = getRawPathQuery(); 3165 return (rawPathQuery == null) ? null : new String(rawPathQuery); 3166 } 3167 3168 3169 /*** 3170 * Get the path and query. 3171 * 3172 * @return the path and query string. 3173 * @throws URIException incomplete trailing escape pattern or unsupported 3174 * character encoding 3175 * @see #decode 3176 */ 3177 public String getPathQuery() throws URIException { 3178 char[] rawPathQuery = getRawPathQuery(); 3179 return (rawPathQuery == null) ? null : decode(rawPathQuery, 3180 getProtocolCharset()); 3181 } 3182 3183 // -------------------------------------------------------------- The query 3184 3185 /*** 3186 * Set the raw-escaped query. 3187 * 3188 * @param escapedQuery the raw-escaped query 3189 * @throws URIException escaped query not valid 3190 */ 3191 public void setRawQuery(char[] escapedQuery) throws URIException { 3192 if (escapedQuery == null || escapedQuery.length == 0) { 3193 _query = escapedQuery; 3194 setURI(); 3195 return; 3196 } 3197 // remove the fragment identifier 3198 escapedQuery = removeFragmentIdentifier(escapedQuery); 3199 if (!validate(escapedQuery, query)) { 3200 throw new URIException(URIException.ESCAPING, 3201 "escaped query not valid"); 3202 } 3203 _query = escapedQuery; 3204 setURI(); 3205 } 3206 3207 3208 /*** 3209 * Set the escaped query string. 3210 * 3211 * @param escapedQuery the escaped query string 3212 * @throws URIException escaped query not valid 3213 */ 3214 public void setEscapedQuery(String escapedQuery) throws URIException { 3215 if (escapedQuery == null) { 3216 _query = null; 3217 setURI(); 3218 return; 3219 } 3220 setRawQuery(escapedQuery.toCharArray()); 3221 } 3222 3223 3224 /*** 3225 * Set the query. 3226 * <p> 3227 * When a query string is not misunderstood the reserved special characters 3228 * ("&", "=", "+", ",", and "$") within a query component, it is 3229 * recommended to use in encoding the whole query with this method. 3230 * <p> 3231 * The additional APIs for the special purpose using by the reserved 3232 * special characters used in each protocol are implemented in each protocol 3233 * classes inherited from <code>URI</code>. So refer to the same-named APIs 3234 * implemented in each specific protocol instance. 3235 * 3236 * @param query the query string. 3237 * @throws URIException incomplete trailing escape pattern or unsupported 3238 * character encoding 3239 * @see #encode 3240 */ 3241 public void setQuery(String query) throws URIException { 3242 if (query == null || query.length() == 0) { 3243 _query = (query == null) ? null : query.toCharArray(); 3244 setURI(); 3245 return; 3246 } 3247 setRawQuery(encode(query, allowed_query, getProtocolCharset())); 3248 } 3249 3250 3251 /*** 3252 * Get the raw-escaped query. 3253 * 3254 * @return the raw-escaped query 3255 */ 3256 public char[] getRawQuery() { 3257 return _query; 3258 } 3259 3260 3261 /*** 3262 * Get the escaped query. 3263 * 3264 * @return the escaped query string 3265 */ 3266 public String getEscapedQuery() { 3267 return (_query == null) ? null : new String(_query); 3268 } 3269 3270 3271 /*** 3272 * Get the query. 3273 * 3274 * @return the query string. 3275 * @throws URIException incomplete trailing escape pattern or unsupported 3276 * character encoding 3277 * @see #decode 3278 */ 3279 public String getQuery() throws URIException { 3280 return (_query == null) ? null : decode(_query, getProtocolCharset()); 3281 } 3282 3283 // ----------------------------------------------------------- The fragment 3284 3285 /*** 3286 * Set the raw-escaped fragment. 3287 * 3288 * @param escapedFragment the raw-escaped fragment 3289 * @throws URIException escaped fragment not valid 3290 */ 3291 public void setRawFragment(char[] escapedFragment) throws URIException { 3292 if (escapedFragment == null || escapedFragment.length == 0) { 3293 _fragment = escapedFragment; 3294 hash = 0; 3295 return; 3296 } 3297 if (!validate(escapedFragment, fragment)) { 3298 throw new URIException(URIException.ESCAPING, 3299 "escaped fragment not valid"); 3300 } 3301 _fragment = escapedFragment; 3302 hash = 0; 3303 } 3304 3305 3306 /*** 3307 * Set the escaped fragment string. 3308 * 3309 * @param escapedFragment the escaped fragment string 3310 * @throws URIException escaped fragment not valid 3311 */ 3312 public void setEscapedFragment(String escapedFragment) throws URIException { 3313 if (escapedFragment == null) { 3314 _fragment = null; 3315 hash = 0; 3316 return; 3317 } 3318 setRawFragment(escapedFragment.toCharArray()); 3319 } 3320 3321 3322 /*** 3323 * Set the fragment. 3324 * 3325 * @param fragment the fragment string. 3326 * @throws URIException If an error occurs. 3327 */ 3328 public void setFragment(String fragment) throws URIException { 3329 if (fragment == null || fragment.length() == 0) { 3330 _fragment = (fragment == null) ? null : fragment.toCharArray(); 3331 hash = 0; 3332 return; 3333 } 3334 _fragment = encode(fragment, allowed_fragment, getProtocolCharset()); 3335 hash = 0; 3336 } 3337 3338 3339 /*** 3340 * Get the raw-escaped fragment. 3341 * <p> 3342 * The optional fragment identifier is not part of a URI, but is often used 3343 * in conjunction with a URI. 3344 * <p> 3345 * The format and interpretation of fragment identifiers is dependent on 3346 * the media type [RFC2046] of the retrieval result. 3347 * <p> 3348 * A fragment identifier is only meaningful when a URI reference is 3349 * intended for retrieval and the result of that retrieval is a document 3350 * for which the identified fragment is consistently defined. 3351 * 3352 * @return the raw-escaped fragment 3353 */ 3354 public char[] getRawFragment() { 3355 return _fragment; 3356 } 3357 3358 3359 /*** 3360 * Get the escaped fragment. 3361 * 3362 * @return the escaped fragment string 3363 */ 3364 public String getEscapedFragment() { 3365 return (_fragment == null) ? null : new String(_fragment); 3366 } 3367 3368 3369 /*** 3370 * Get the fragment. 3371 * 3372 * @return the fragment string 3373 * @throws URIException incomplete trailing escape pattern or unsupported 3374 * character encoding 3375 * @see #decode 3376 */ 3377 public String getFragment() throws URIException { 3378 return (_fragment == null) ? null : decode(_fragment, 3379 getProtocolCharset()); 3380 } 3381 3382 // ------------------------------------------------------------- Utilities 3383 3384 /*** 3385 * Remove the fragment identifier of the given component. 3386 * 3387 * @param component the component that a fragment may be included 3388 * @return the component that the fragment identifier is removed 3389 */ 3390 protected char[] removeFragmentIdentifier(char[] component) { 3391 if (component == null) { 3392 return null; 3393 } 3394 int lastIndex = new String(component).indexOf('#'); 3395 if (lastIndex != -1) { 3396 component = new String(component).substring(0, 3397 lastIndex).toCharArray(); 3398 } 3399 return component; 3400 } 3401 3402 3403 /*** 3404 * Normalize the given hier path part. 3405 * 3406 * <p>Algorithm taken from URI reference parser at 3407 * http://www.apache.org/~fielding/uri/rev-2002/issues.html. 3408 * 3409 * @param path the path to normalize 3410 * @return the normalized path 3411 * @throws URIException no more higher path level to be normalized 3412 */ 3413 protected char[] normalize(char[] path) throws URIException { 3414 3415 if (path == null) { 3416 return null; 3417 } 3418 3419 String normalized = new String(path); 3420 3421 // If the buffer begins with "./" or "../", the "." or ".." is removed. 3422 if (normalized.startsWith("./")) { 3423 normalized = normalized.substring(1); 3424 } else if (normalized.startsWith("../")) { 3425 normalized = normalized.substring(2); 3426 } else if (normalized.startsWith("..")) { 3427 normalized = normalized.substring(2); 3428 } 3429 3430 // All occurrences of "/./" in the buffer are replaced with "/" 3431 int index = -1; 3432 while ((index = normalized.indexOf("/./")) != -1) { 3433 normalized = normalized.substring(0, index) + normalized.substring(index + 2); 3434 } 3435 3436 // If the buffer ends with "/.", the "." is removed. 3437 if (normalized.endsWith("/.")) { 3438 normalized = normalized.substring(0, normalized.length() - 1); 3439 } 3440 3441 int startIndex = 0; 3442 3443 // All occurrences of "/<segment>/../" in the buffer, where ".." 3444 // and <segment> are complete path segments, are iteratively replaced 3445 // with "/" in order from left to right until no matching pattern remains. 3446 // If the buffer ends with "/<segment>/..", that is also replaced 3447 // with "/". Note that <segment> may be empty. 3448 while ((index = normalized.indexOf("/../", startIndex)) != -1) { 3449 int slashIndex = normalized.lastIndexOf('/', index - 1); 3450 if (slashIndex >= 0) { 3451 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3); 3452 } else { 3453 startIndex = index + 3; 3454 } 3455 } 3456 if (normalized.endsWith("/..")) { 3457 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4); 3458 if (slashIndex >= 0) { 3459 normalized = normalized.substring(0, slashIndex + 1); 3460 } 3461 } 3462 3463 // All prefixes of "<segment>/../" in the buffer, where ".." 3464 // and <segment> are complete path segments, are iteratively replaced 3465 // with "/" in order from left to right until no matching pattern remains. 3466 // If the buffer ends with "<segment>/..", that is also replaced 3467 // with "/". Note that <segment> may be empty. 3468 while ((index = normalized.indexOf("/../")) != -1) { 3469 int slashIndex = normalized.lastIndexOf('/', index - 1); 3470 if (slashIndex >= 0) { 3471 break; 3472 } else { 3473 normalized = normalized.substring(index + 3); 3474 } 3475 } 3476 if (normalized.endsWith("/..")) { 3477 int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4); 3478 if (slashIndex < 0) { 3479 normalized = "/"; 3480 } 3481 } 3482 3483 return normalized.toCharArray(); 3484 } 3485 3486 3487 /*** 3488 * Normalize the path part of this URI. 3489 * 3490 * @throws URIException no more higher path level to be normalized 3491 */ 3492 public void normalize() throws URIException { 3493 _path = normalize(_path); 3494 setURI(); 3495 } 3496 3497 3498 /*** 3499 * Test if the first array is equal to the second array. 3500 * 3501 * @param first the first character array 3502 * @param second the second character array 3503 * @return true if they're equal 3504 */ 3505 protected boolean equals(char[] first, char[] second) { 3506 3507 if (first == null && second == null) { 3508 return true; 3509 } 3510 if (first == null || second == null) { 3511 return false; 3512 } 3513 if (first.length != second.length) { 3514 return false; 3515 } 3516 for (int i = 0; i < first.length; i++) { 3517 if (first[i] != second[i]) { 3518 return false; 3519 } 3520 } 3521 return true; 3522 } 3523 3524 3525 /*** 3526 * Test an object if this URI is equal to another. 3527 * 3528 * @param obj an object to compare 3529 * @return true if two URI objects are equal 3530 */ 3531 public boolean equals(Object obj) { 3532 3533 // normalize and test each components 3534 if (obj == this) { 3535 return true; 3536 } 3537 if (!(obj instanceof URI)) { 3538 return false; 3539 } 3540 URI another = (URI) obj; 3541 // scheme 3542 if (!equals(_scheme, another._scheme)) { 3543 return false; 3544 } 3545 // is_opaque_part or is_hier_part? and opaque 3546 if (!equals(_opaque, another._opaque)) { 3547 return false; 3548 } 3549 // is_hier_part 3550 // has_authority 3551 if (!equals(_authority, another._authority)) { 3552 return false; 3553 } 3554 // path 3555 if (!equals(_path, another._path)) { 3556 return false; 3557 } 3558 // has_query 3559 if (!equals(_query, another._query)) { 3560 return false; 3561 } 3562 // has_fragment? should be careful of the only fragment case. 3563 if (!equals(_fragment, another._fragment)) { 3564 return false; 3565 } 3566 return true; 3567 } 3568 3569 // ---------------------------------------------------------- Serialization 3570 3571 /*** 3572 * Write the content of this URI. 3573 * 3574 * @param oos the object-output stream 3575 * @throws IOException If an IO problem occurs. 3576 */ 3577 protected void writeObject(ObjectOutputStream oos) 3578 throws IOException { 3579 3580 oos.defaultWriteObject(); 3581 } 3582 3583 3584 /*** 3585 * Read a URI. 3586 * 3587 * @param ois the object-input stream 3588 * @throws ClassNotFoundException If one of the classes specified in the 3589 * input stream cannot be found. 3590 * @throws IOException If an IO problem occurs. 3591 */ 3592 protected void readObject(ObjectInputStream ois) 3593 throws ClassNotFoundException, IOException { 3594 3595 ois.defaultReadObject(); 3596 } 3597 3598 // -------------------------------------------------------------- Hash code 3599 3600 /*** 3601 * Return a hash code for this URI. 3602 * 3603 * @return a has code value for this URI 3604 */ 3605 public int hashCode() { 3606 if (hash == 0) { 3607 char[] c = _uri; 3608 if (c != null) { 3609 for (int i = 0, len = c.length; i < len; i++) { 3610 hash = 31 * hash + c[i]; 3611 } 3612 } 3613 c = _fragment; 3614 if (c != null) { 3615 for (int i = 0, len = c.length; i < len; i++) { 3616 hash = 31 * hash + c[i]; 3617 } 3618 } 3619 } 3620 return hash; 3621 } 3622 3623 // ------------------------------------------------------------- Comparison 3624 3625 /*** 3626 * Compare this URI to another object. 3627 * 3628 * @param obj the object to be compared. 3629 * @return 0, if it's same, 3630 * -1, if failed, first being compared with in the authority component 3631 * @throws ClassCastException not URI argument 3632 */ 3633 public int compareTo(Object obj) throws ClassCastException { 3634 3635 URI another = (URI) obj; 3636 if (!equals(_authority, another.getRawAuthority())) { 3637 return -1; 3638 } 3639 return toString().compareTo(another.toString()); 3640 } 3641 3642 // ------------------------------------------------------------------ Clone 3643 3644 /*** 3645 * Create and return a copy of this object, the URI-reference containing 3646 * the userinfo component. Notice that the whole URI-reference including 3647 * the userinfo component counld not be gotten as a <code>String</code>. 3648 * <p> 3649 * To copy the identical <code>URI</code> object including the userinfo 3650 * component, it should be used. 3651 * 3652 * @return a clone of this instance 3653 */ 3654 public synchronized Object clone() { 3655 3656 URI instance = new URI(); 3657 3658 instance._uri = _uri; 3659 instance._scheme = _scheme; 3660 instance._opaque = _opaque; 3661 instance._authority = _authority; 3662 instance._userinfo = _userinfo; 3663 instance._host = _host; 3664 instance._port = _port; 3665 instance._path = _path; 3666 instance._query = _query; 3667 instance._fragment = _fragment; 3668 // the charset to do escape encoding for this instance 3669 instance.protocolCharset = protocolCharset; 3670 // flags 3671 instance._is_hier_part = _is_hier_part; 3672 instance._is_opaque_part = _is_opaque_part; 3673 instance._is_net_path = _is_net_path; 3674 instance._is_abs_path = _is_abs_path; 3675 instance._is_rel_path = _is_rel_path; 3676 instance._is_reg_name = _is_reg_name; 3677 instance._is_server = _is_server; 3678 instance._is_hostname = _is_hostname; 3679 instance._is_IPv4address = _is_IPv4address; 3680 instance._is_IPv6reference = _is_IPv6reference; 3681 3682 return instance; 3683 } 3684 3685 // ------------------------------------------------------------ Get the URI 3686 3687 /*** 3688 * It can be gotten the URI character sequence. It's raw-escaped. 3689 * For the purpose of the protocol to be transported, it will be useful. 3690 * <p> 3691 * It is clearly unwise to use a URL that contains a password which is 3692 * intended to be secret. In particular, the use of a password within 3693 * the 'userinfo' component of a URL is strongly disrecommended except 3694 * in those rare cases where the 'password' parameter is intended to be 3695 * public. 3696 * <p> 3697 * When you want to get each part of the userinfo, you need to use the 3698 * specific methods in the specific URL. It depends on the specific URL. 3699 * 3700 * @return the URI character sequence 3701 */ 3702 public char[] getRawURI() { 3703 return _uri; 3704 } 3705 3706 3707 /*** 3708 * It can be gotten the URI character sequence. It's escaped. 3709 * For the purpose of the protocol to be transported, it will be useful. 3710 * 3711 * @return the escaped URI string 3712 */ 3713 public String getEscapedURI() { 3714 return (_uri == null) ? null : new String(_uri); 3715 } 3716 3717 3718 /*** 3719 * It can be gotten the URI character sequence. 3720 * 3721 * @return the original URI string 3722 * @throws URIException incomplete trailing escape pattern or unsupported 3723 * character encoding 3724 * @see #decode 3725 */ 3726 public String getURI() throws URIException { 3727 return (_uri == null) ? null : decode(_uri, getProtocolCharset()); 3728 } 3729 3730 3731 /*** 3732 * Get the URI reference character sequence. 3733 * 3734 * @return the URI reference character sequence 3735 */ 3736 public char[] getRawURIReference() { 3737 if (_fragment == null) { 3738 return _uri; 3739 } 3740 if (_uri == null) { 3741 return _fragment; 3742 } 3743 // if _uri != null && _fragment != null 3744 String uriReference = new String(_uri) + "#" + new String(_fragment); 3745 return uriReference.toCharArray(); 3746 } 3747 3748 3749 /*** 3750 * Get the escaped URI reference string. 3751 * 3752 * @return the escaped URI reference string 3753 */ 3754 public String getEscapedURIReference() { 3755 char[] uriReference = getRawURIReference(); 3756 return (uriReference == null) ? null : new String(uriReference); 3757 } 3758 3759 3760 /*** 3761 * Get the original URI reference string. 3762 * 3763 * @return the original URI reference string 3764 * @throws URIException If {@link #decode} fails. 3765 */ 3766 public String getURIReference() throws URIException { 3767 char[] uriReference = getRawURIReference(); 3768 return (uriReference == null) ? null : decode(uriReference, 3769 getProtocolCharset()); 3770 } 3771 3772 3773 /*** 3774 * Get the escaped URI string. 3775 * <p> 3776 * On the document, the URI-reference form is only used without the userinfo 3777 * component like http://jakarta.apache.org/ by the security reason. 3778 * But the URI-reference form with the userinfo component could be parsed. 3779 * <p> 3780 * In other words, this URI and any its subclasses must not expose the 3781 * URI-reference expression with the userinfo component like 3782 * http://user:password@hostport/restricted_zone.<;br> 3783 * It means that the API client programmer should extract each user and 3784 * password to access manually. Probably it will be supported in the each 3785 * subclass, however, not a whole URI-reference expression. 3786 * 3787 * @return the escaped URI string 3788 * @see #clone() 3789 */ 3790 public String toString() { 3791 return getEscapedURI(); 3792 } 3793 3794 3795 // ------------------------------------------------------------ Inner class 3796 3797 /*** 3798 * The charset-changed normal operation to represent to be required to 3799 * alert to user the fact the default charset is changed. 3800 */ 3801 public static class DefaultCharsetChanged extends RuntimeException { 3802 3803 // ------------------------------------------------------- constructors 3804 3805 /*** 3806 * The constructor with a reason string and its code arguments. 3807 * 3808 * @param reasonCode the reason code 3809 * @param reason the reason 3810 */ 3811 public DefaultCharsetChanged(int reasonCode, String reason) { 3812 super(reason); 3813 this.reason = reason; 3814 this.reasonCode = reasonCode; 3815 } 3816 3817 // ---------------------------------------------------------- constants 3818 3819 /*** No specified reason code. */ 3820 public static final int UNKNOWN = 0; 3821 3822 /*** Protocol charset changed. */ 3823 public static final int PROTOCOL_CHARSET = 1; 3824 3825 /*** Document charset changed. */ 3826 public static final int DOCUMENT_CHARSET = 2; 3827 3828 // ------------------------------------------------- instance variables 3829 3830 /*** The reason code. */ 3831 private int reasonCode; 3832 3833 /*** The reason message. */ 3834 private String reason; 3835 3836 // ------------------------------------------------------------ methods 3837 3838 /*** 3839 * Get the reason code. 3840 * 3841 * @return the reason code 3842 */ 3843 public int getReasonCode() { 3844 return reasonCode; 3845 } 3846 3847 /*** 3848 * Get the reason message. 3849 * 3850 * @return the reason message 3851 */ 3852 public String getReason() { 3853 return reason; 3854 } 3855 3856 } 3857 3858 3859 /*** 3860 * A mapping to determine the (somewhat arbitrarily) preferred charset for a 3861 * given locale. Supports all locales recognized in JDK 1.1. 3862 * <p> 3863 * The distribution of this class is Servlets.com. It was originally 3864 * written by Jason Hunter [jhunter at acm.org] and used by with permission. 3865 */ 3866 public static class LocaleToCharsetMap { 3867 3868 /*** A mapping of language code to charset */ 3869 private static final Hashtable LOCALE_TO_CHARSET_MAP; 3870 static { 3871 LOCALE_TO_CHARSET_MAP = new Hashtable(); 3872 LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6"); 3873 LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5"); 3874 LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5"); 3875 LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1"); 3876 LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2"); 3877 LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1"); 3878 LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1"); 3879 LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7"); 3880 LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1"); 3881 LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1"); 3882 LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1"); 3883 LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1"); 3884 LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1"); 3885 LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2"); 3886 LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2"); 3887 LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1"); 3888 LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1"); 3889 LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8"); 3890 LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS"); 3891 LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR"); 3892 LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2"); 3893 LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2"); 3894 LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5"); 3895 LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1"); 3896 LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1"); 3897 LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2"); 3898 LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1"); 3899 LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2"); 3900 LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5"); 3901 LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5"); 3902 LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2"); 3903 LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2"); 3904 LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2"); 3905 LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5"); 3906 LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1"); 3907 LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9"); 3908 LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5"); 3909 LOCALE_TO_CHARSET_MAP.put("zh", "GB2312"); 3910 LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5"); 3911 } 3912 3913 /*** 3914 * Get the preferred charset for the given locale. 3915 * 3916 * @param locale the locale 3917 * @return the preferred charset or null if the locale is not 3918 * recognized. 3919 */ 3920 public static String getCharset(Locale locale) { 3921 // try for an full name match (may include country) 3922 String charset = 3923 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString()); 3924 if (charset != null) { 3925 return charset; 3926 } 3927 3928 // if a full name didn't match, try just the language 3929 charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage()); 3930 return charset; // may be null 3931 } 3932 3933 } 3934 3935 } 3936

This page was automatically generated by Maven