Coverage Report - net.wotonomy.web.URI
 
Classes in this File Line Coverage Branch Coverage Complexity
URI
0% 
0% 
2.893
 
 1  
 /*
 2  
  * $Header$
 3  
  * $Revision: 905 $
 4  
  * $Date: 2006-02-19 01:44:03 +0000 (Sun, 19 Feb 2006) $
 5  
  *
 6  
  * ====================================================================
 7  
  *
 8  
  * The Apache Software License, Version 1.1
 9  
  *
 10  
  * Copyright (c) 2002 the Apache Software Foundation.  All rights 
 11  
  * reserved.
 12  
  *
 13  
  * Redistribution and use in source and binary forms, with or without
 14  
  * modification, are permitted provided that the following conditions
 15  
  * are met:
 16  
  *
 17  
  * 1. Redistributions of source code must retain the above copyright
 18  
  *    notice, this list of conditions and the following disclaimer. 
 19  
  *
 20  
  * 2. Redistributions in binary form must reproduce the above copyright
 21  
  *    notice, this list of conditions and the following disclaimer in
 22  
  *    the documentation and/or other materials provided with the
 23  
  *    distribution.
 24  
  *
 25  
  * 3. The end-user documentation included with the redistribution, if
 26  
  *    any, must include the following acknowlegement:  
 27  
  *       "This product includes software developed by the 
 28  
  *        Apache Software Foundation (http://www.apache.org/)."
 29  
  *    Alternately, this acknowlegement may appear in the software itself,
 30  
  *    if and wherever such third-party acknowlegements normally appear.
 31  
  *
 32  
  * 4. The names "The Jakarta Project", "HttpClient", and "Apache Software
 33  
  *    Foundation" must not be used to endorse or promote products derived
 34  
  *    from this software without prior written permission. For written 
 35  
  *    permission, please contact apache@apache.org.
 36  
  *
 37  
  * 5. Products derived from this software may not be called "Apache"
 38  
  *    nor may "Apache" appear in their names without prior written
 39  
  *    permission of the Apache Group.
 40  
  *
 41  
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 42  
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 43  
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 44  
  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 45  
  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 46  
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 47  
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 48  
  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 49  
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 50  
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 51  
  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 52  
  * SUCH DAMAGE.
 53  
  * ====================================================================
 54  
  *
 55  
  * This software consists of voluntary contributions made by many
 56  
  * individuals on behalf of the Apache Software Foundation.  For more
 57  
  * information on the Apache Software Foundation, please see
 58  
  * <http://www.apache.org/>.
 59  
  *
 60  
  * [Additional notices, if required by prior licensing conditions]
 61  
  *
 62  
  */ 
 63  
 
 64  
 // excellent class borrowed from Apache Commons project:
 65  
 //package org.apache.commons.httpclient;
 66  
 
 67  
 package net.wotonomy.web;
 68  
 
 69  
 import java.io.IOException;
 70  
 import java.io.Serializable;
 71  
 import java.io.UnsupportedEncodingException;
 72  
 import java.net.URL;
 73  
 import java.security.AccessController;
 74  
 import java.util.BitSet;
 75  
 import java.util.Hashtable;
 76  
 import java.util.Locale;
 77  
 
 78  
 import sun.security.action.GetPropertyAction;
 79  
 
 80  
 /**
 81  
  * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
 82  
  * This class has the purpose of supportting of parsing a URI reference to
 83  
  * extend any specific protocols, the character encoding of the protocol to 
 84  
  * be transported and the charset of the document.
 85  
  * <p>
 86  
  * A URI is always in an "escaped" form, since escaping or unescaping a
 87  
  * completed URI might change its semantics.  
 88  
  * <p>
 89  
  * Implementers should be careful not to escape or unescape the same string
 90  
  * more than once, since unescaping an already unescaped string might lead to
 91  
  * misinterpreting a percent data character as another escaped character,
 92  
  * or vice versa in the case of escaping an already escaped string.
 93  
  * <p>
 94  
  * In order to avoid these problems, data types used as follows:
 95  
  * <p><blockquote><pre>
 96  
  *   URI character sequence: char
 97  
  *   octet sequence: byte
 98  
  *   original character sequence: String
 99  
  * </pre></blockquote><p>
 100  
  *
 101  
  * So, a URI is a sequence of characters as an array of a char type, which
 102  
  * is not always represented as a sequence of octets as an array of byte.
 103  
  * <p>
 104  
  * 
 105  
  * URI Syntactic Components
 106  
  * <p><blockquote><pre>
 107  
  * - In general, written as follows:
 108  
  *   Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
 109  
  *   Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
 110  
  *
 111  
  * - Syntax
 112  
  *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
 113  
  *   hier_part     = ( net_path | abs_path ) [ "?" query ]
 114  
  *   net_path      = "//" authority [ abs_path ]
 115  
  *   abs_path      = "/"  path_segments
 116  
  * </pre></blockquote><p>
 117  
  *
 118  
  * The following examples illustrate URI that are in common use.
 119  
  * <pre>
 120  
  * ftp://ftp.is.co.za/rfc/rfc1808.txt
 121  
  *    -- ftp scheme for File Transfer Protocol services
 122  
  * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
 123  
  *    -- gopher scheme for Gopher and Gopher+ Protocol services
 124  
  * http://www.math.uio.no/faq/compression-faq/part1.html
 125  
  *    -- http scheme for Hypertext Transfer Protocol services
 126  
  * mailto:mduerst@ifi.unizh.ch
 127  
  *    -- mailto scheme for electronic mail addresses
 128  
  * news:comp.infosystems.www.servers.unix
 129  
  *    -- news scheme for USENET news groups and articles
 130  
  * telnet://melvyl.ucop.edu/
 131  
  *    -- telnet scheme for interactive services via the TELNET Protocol
 132  
  * </pre>
 133  
  * Please, notice that there are many modifications from URL(RFC 1738) and
 134  
  * relative URL(RFC 1808).
 135  
  * <p>
 136  
  * <b>The expressions for a URI</b>
 137  
  * <p><pre>
 138  
  * For escaped URI forms
 139  
  *  - URI(char[]) // constructor
 140  
  *  - char[] getRawXxx() // method
 141  
  *  - String getEscapedXxx() // method
 142  
  *  - String toString() // method
 143  
  * <p>
 144  
  * For unescaped URI forms
 145  
  *  - URI(String) // constructor
 146  
  *  - String getXXX() // method
 147  
  * </pre><p>
 148  
  *
 149  
  * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
 150  
  * @version $Revision: 905 $ $Date: 2002/03/14 15:14:01 
 151  
  */
 152  0
 class URI implements Cloneable, Comparable, Serializable {
 153  
 
 154  
 
 155  
     // ----------------------------------------------------------- Constructors
 156  
 
 157  0
     protected URI() {
 158  0
     }
 159  
 
 160  
     /**
 161  
      * Construct a URI as an escaped form of a character array.
 162  
      * An URI can be placed within double-quotes or angle brackets like 
 163  
      * "http://test.com/" and &lt;http://test.com/&gt;
 164  
      * 
 165  
      * @param escaped the URI character sequence
 166  
      * @exception IOException
 167  
      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
 168  
      */
 169  0
     public URI(char[] escaped) throws IOException {
 170  0
         parseUriReference(new String(escaped), true);
 171  0
     }
 172  
 
 173  
 
 174  
     /**
 175  
      * Construct a URI from the given string.
 176  
      * <p><blockquote><pre>
 177  
      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 178  
      * </pre></blockquote><p>
 179  
      * An URI can be placed within double-quotes or angle brackets like 
 180  
      * "http://test.com/" and &lt;http://test.com/&gt;
 181  
      *
 182  
      * @param original the string to be represented to URI character sequence
 183  
      * It is one of absoluteURI and relativeURI.
 184  
      * @exception IOException
 185  
      */
 186  0
     public URI(String original) throws IOException {
 187  0
         parseUriReference(original, false);
 188  0
     }
 189  
 
 190  
     /**
 191  
      * Construct a URI from a URL.
 192  
      *
 193  
      * @param url a valid URL.
 194  
      * @throws IOException
 195  
      * @since 2.0 
 196  
      */
 197  
     public URI(URL url) throws IOException {
 198  0
         this(url.toString());
 199  0
     }
 200  
 
 201  
 
 202  
     /**
 203  
      * Construct a general URI from the given components.
 204  
      * <p><blockquote><pre>
 205  
      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 206  
      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
 207  
      *   opaque_part   = uric_no_slash *uric
 208  
      * </pre></blockquote><p>
 209  
      * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
 210  
      * &lt;fragment&gt;.
 211  
      *
 212  
      * @param scheme the scheme string
 213  
      * @param scheme_specific_part scheme_specific_part
 214  
      * @param fragment the fragment string
 215  
      * @exception IOException
 216  
      */
 217  0
     public URI(String scheme, String scheme_specific_part, String fragment)
 218  0
         throws IOException {
 219  
 
 220  
         // validate and contruct the URI character sequence
 221  0
         if (scheme == null) {
 222  0
            throw new IOException(/*IOException.PARSING,*/ "URI: scheme required");
 223  
         }
 224  0
         char[] s = scheme.toLowerCase().toCharArray();
 225  0
         if (validate(s, URI.scheme)) {
 226  0
             _scheme = s; // is_absoluteURI
 227  0
         } else {
 228  0
             throw new IOException(/*IOException.PARSING,*/ "URI: incorrect scheme");
 229  
         }
 230  0
         _opaque = encode(scheme_specific_part, allowed_opaque_part);
 231  
         // Set flag
 232  0
         _is_opaque_part = true;
 233  0
         setUriReference();
 234  0
     }
 235  
 
 236  
 
 237  
     /**
 238  
      * Construct a general URI from the given components.
 239  
      * <p><blockquote><pre>
 240  
      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 241  
      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
 242  
      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
 243  
      *   hier_part     = ( net_path | abs_path ) [ "?" query ]
 244  
      * </pre></blockquote><p>
 245  
      * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
 246  
      * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
 247  
      * &gt;.
 248  
      *
 249  
      * @param scheme the scheme string
 250  
      * @param authority the authority string
 251  
      * @param path the path string
 252  
      * @param query the query string
 253  
      * @param fragment the fragment string
 254  
      * @exception IOException
 255  
      */
 256  0
     public URI(String scheme, String authority, String path, String query,
 257  0
                String fragment) throws IOException {
 258  
 
 259  
         // validate and contruct the URI character sequence
 260  0
         StringBuffer buff = new StringBuffer();
 261  0
         if (scheme != null) {
 262  0
             buff.append(scheme);
 263  0
             buff.append(':');
 264  
         }
 265  0
         if (authority != null) {
 266  0
             buff.append("//");
 267  0
             buff.append(authority);
 268  
         }
 269  0
         if (path != null) {  // accept empty path
 270  0
             if ((scheme != null || authority != null)
 271  0
                     && !path.startsWith("/")) {
 272  0
                 throw new IOException(/*IOException.PARSING*,*/
 273  0
                         "URI: abs_path requested");
 274  
             }
 275  0
             buff.append(path);
 276  
         }
 277  0
         if (query != null) {
 278  0
             buff.append('?');
 279  0
             buff.append(query);
 280  
         }
 281  0
         if (fragment != null) {
 282  0
             buff.append('#');
 283  0
             buff.append(fragment);
 284  
         }
 285  0
         parseUriReference(buff.toString(), false);
 286  0
     }
 287  
 
 288  
 
 289  
     /**
 290  
      * Construct a general URI from the given components.
 291  
      *
 292  
      * @param scheme the scheme string
 293  
      * @param userinfo the userinfo string
 294  
      * @param host the host string
 295  
      * @param port the port number
 296  
      * @exception IOException
 297  
      */
 298  
     public URI(String scheme, String userinfo, String host, int port)
 299  
         throws IOException {
 300  
 
 301  0
         this(scheme, userinfo, host, port, null, null, null);
 302  0
     }
 303  
 
 304  
 
 305  
     /**
 306  
      * Construct a general URI from the given components.
 307  
      *
 308  
      * @param scheme the scheme string
 309  
      * @param userinfo the userinfo string
 310  
      * @param host the host string
 311  
      * @param port the port number
 312  
      * @param path the path string
 313  
      * @exception IOException
 314  
      */
 315  
     public URI(String scheme, String userinfo, String host, int port,
 316  
             String path) throws IOException {
 317  
 
 318  0
         this(scheme, userinfo, host, port, path, null, null);
 319  0
     }
 320  
 
 321  
 
 322  
     /**
 323  
      * Construct a general URI from the given components.
 324  
      *
 325  
      * @param scheme the scheme string
 326  
      * @param userinfo the userinfo string
 327  
      * @param host the host string
 328  
      * @param port the port number
 329  
      * @param path the path string
 330  
      * @param query the query string
 331  
      * @exception IOException
 332  
      */
 333  
     public URI(String scheme, String userinfo, String host, int port,
 334  
             String path, String query) throws IOException {
 335  
 
 336  0
         this(scheme, userinfo, host, port, path, query, null);
 337  0
     }
 338  
 
 339  
 
 340  
     /**
 341  
      * Construct a general URI from the given components.
 342  
      *
 343  
      * @param scheme the scheme string
 344  
      * @param userinfo the userinfo string
 345  
      * @param host the host string
 346  
      * @param port the port number
 347  
      * @param path the path string
 348  
      * @param query the query string
 349  
      * @param fragment the fragment string
 350  
      * @exception IOException
 351  
      */
 352  
     public URI(String scheme, String userinfo, String host, int port,
 353  
             String path, String query, String fragment) throws IOException {
 354  
 
 355  0
         this(scheme, (host == null) ? null :
 356  0
                 ((userinfo != null) ? userinfo + '@' : "") + host +
 357  0
                 ((port != -1) ? ":" + port : ""), path, query, fragment);
 358  0
     }
 359  
 
 360  
 
 361  
     /**
 362  
      * Construct a general URI from the given components.
 363  
      *
 364  
      * @param scheme the scheme string
 365  
      * @param host the host string
 366  
      * @param path the path string
 367  
      * @param fragment the fragment string
 368  
      * @exception IOException
 369  
      */
 370  
     public URI(String scheme, String host, String path, String fragment)
 371  
         throws IOException {
 372  
 
 373  0
         this(scheme, host, path, null, fragment);
 374  0
     }
 375  
 
 376  
 
 377  
     /**
 378  
      * Construct a general URI with the given relative URI string.
 379  
      *
 380  
      * @param base the base URI
 381  
      * @param relative the relative URI string
 382  
      * @exception IOException
 383  
      */
 384  
     public URI(URI base, String relative) throws IOException {
 385  0
         this(base, new URI(relative));
 386  0
     }
 387  
 
 388  
 
 389  
     /**
 390  
      * Construct a general URI with the given relative URI.
 391  
      * <p><blockquote><pre>
 392  
      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 393  
      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
 394  
      * </pre></blockquote><p>
 395  
      * Resolving Relative References to Absolute Form.
 396  
      *
 397  
      * <strong>Examples of Resolving Relative URI References</strong>
 398  
      *
 399  
      * Within an object with a well-defined base URI of
 400  
      * <p><blockquote><pre>
 401  
      *   http://a/b/c/d;p?q
 402  
      * </pre></blockquote><p>
 403  
      * the relative URI would be resolved as follows:
 404  
      *
 405  
      * Normal Examples
 406  
      *
 407  
      * <p><blockquote><pre>
 408  
      *   g:h           =  g:h
 409  
      *   g             =  http://a/b/c/g
 410  
      *   ./g           =  http://a/b/c/g
 411  
      *   g/            =  http://a/b/c/g/
 412  
      *   /g            =  http://a/g
 413  
      *   //g           =  http://g
 414  
      *   ?y            =  http://a/b/c/?y
 415  
      *   g?y           =  http://a/b/c/g?y
 416  
      *   #s            =  (current document)#s
 417  
      *   g#s           =  http://a/b/c/g#s
 418  
      *   g?y#s         =  http://a/b/c/g?y#s
 419  
      *   ;x            =  http://a/b/c/;x
 420  
      *   g;x           =  http://a/b/c/g;x
 421  
      *   g;x?y#s       =  http://a/b/c/g;x?y#s
 422  
      *   .             =  http://a/b/c/
 423  
      *   ./            =  http://a/b/c/
 424  
      *   ..            =  http://a/b/
 425  
      *   ../           =  http://a/b/
 426  
      *   ../g          =  http://a/b/g
 427  
      *   ../..         =  http://a/
 428  
      *   ../../        =  http://a/ 
 429  
      *   ../../g       =  http://a/g
 430  
      * </pre></blockquote><p>
 431  
      *
 432  
      * Some URI schemes do not allow a hierarchical syntax matching the
 433  
      * <hier_part> syntax, and thus cannot use relative references.
 434  
      *
 435  
      * @param base the base URI
 436  
      * @param relative the relative URI
 437  
      * @exception IOException
 438  
      */
 439  0
     public URI(URI base, URI relative) throws IOException {
 440  
 
 441  0
         if (base._scheme == null) {
 442  0
             throw new IOException(/* IOException.PARSING,*/ "URI: base URI required");
 443  
         }
 444  0
         if (base._scheme != null) {
 445  0
             this._scheme = base._scheme;
 446  0
             this._authority = base._authority;
 447  
         }
 448  0
         if (base._is_opaque_part || relative._is_opaque_part) {
 449  0
             this._scheme = base._scheme;
 450  0
             this._is_opaque_part = relative._is_opaque_part;
 451  0
             this._opaque = relative._opaque;
 452  0
             this._fragment = relative._fragment;
 453  0
             this.setUriReference();
 454  0
             return;
 455  
         }
 456  0
         if (relative._scheme != null) {
 457  0
             this._scheme = relative._scheme;
 458  0
             this._is_net_path = relative._is_net_path;
 459  0
             this._authority = relative._authority;
 460  0
             if (relative._is_server) {
 461  0
                 this._userinfo = relative._userinfo;
 462  0
                 this._host = relative._host;
 463  0
                 this._port = relative._port;
 464  0
             } else if (relative._is_reg_name) {
 465  0
                 this._is_reg_name = relative._is_reg_name;
 466  
             }
 467  0
             this._is_abs_path = relative._is_abs_path;
 468  0
             this._is_rel_path = relative._is_rel_path;
 469  0
             this._path = relative._path;
 470  0
         } else if (base._authority != null && relative._scheme == null) {
 471  0
             this._is_net_path = base._is_net_path;
 472  0
             this._authority = base._authority;
 473  0
             if (base._is_server) {
 474  0
                 this._userinfo = base._userinfo;
 475  0
                 this._host = base._host;
 476  0
                 this._port = base._port;
 477  0
             } else if (base._is_reg_name) {
 478  0
                 this._is_reg_name = base._is_reg_name;
 479  
             }
 480  
         }
 481  0
         if (relative._authority != null) {
 482  0
             this._is_net_path = relative._is_net_path;
 483  0
             this._authority = relative._authority;
 484  0
             if (relative._is_server) {
 485  0
                 this._is_server = relative._is_server;
 486  0
                 this._userinfo = relative._userinfo;
 487  0
                 this._host = relative._host;
 488  0
                 this._port = relative._port;
 489  0
             } else if (relative._is_reg_name) {
 490  0
                 this._is_reg_name = relative._is_reg_name;
 491  
             }
 492  0
             this._is_abs_path = relative._is_abs_path;
 493  0
             this._is_rel_path = relative._is_rel_path;
 494  0
             this._path = relative._path;
 495  
         }
 496  
         // resolve the path
 497  0
         if (relative._scheme == null && relative._authority == null || 
 498  0
                 equals(base._scheme, relative._scheme)) {
 499  0
             this._path = resolvePath(base._path, relative._path);
 500  
         }
 501  
         // base._query removed
 502  0
         if (relative._query != null) {
 503  0
             this._query = relative._query;
 504  
         }
 505  
         // base._fragment removed
 506  0
         if (relative._fragment != null) {
 507  0
             this._fragment = relative._fragment;
 508  
         }
 509  0
         this.setUriReference();
 510  0
     }
 511  
 
 512  
     // --------------------------------------------------- Instance Variables
 513  
 
 514  
     static final long serialVersionUID = 604752400577948726L;
 515  
 
 516  
 
 517  
     /**
 518  
      * This Uniform Resource Identifier (URI).
 519  
      * The URI is always in an "escaped" form, since escaping or unescaping
 520  
      * a completed URI might change its semantics.  
 521  
      */
 522  0
     protected char[] _uri = null;
 523  
 
 524  
 
 525  
     /**
 526  
      * The default charset of the protocol.  RFC 2277, 2396
 527  
      */
 528  0
     protected static String _protocolCharset = "UTF-8";
 529  
 
 530  
 
 531  
     /**
 532  
      * The default charset of the document.  RFC 2277, 2396
 533  
      * The platform's charset is used for the document by default.
 534  
      */
 535  0
     protected static String _documentCharset = null;
 536  
     // Static initializer for _documentCharset
 537  
     static {
 538  0
         Locale locale = Locale.getDefault();
 539  0
         if (locale != null) {
 540  
             // in order to support backward compatiblity
 541  0
             _documentCharset = LocaleToCharsetMap.getCharset(locale);
 542  0
         } else {
 543  0
             _documentCharset = (String)AccessController.doPrivileged(
 544  0
                     new GetPropertyAction("file.encoding"));
 545  
         }
 546  
     }
 547  
 
 548  
     /**
 549  
      * The scheme.
 550  
      */
 551  0
     protected char[] _scheme = null;
 552  
 
 553  
 
 554  
     /**
 555  
      * The opaque.
 556  
      */
 557  0
     protected char[] _opaque = null;
 558  
 
 559  
 
 560  
     /**
 561  
      * The authority.
 562  
      */
 563  0
     protected char[] _authority = null;
 564  
 
 565  
 
 566  
     /**
 567  
      * The userinfo.
 568  
      */
 569  0
     protected char[] _userinfo = null;
 570  
 
 571  
 
 572  
     /**
 573  
      * The host.
 574  
      */
 575  0
     protected char[] _host = null;
 576  
 
 577  
 
 578  
     /**
 579  
      * The port.
 580  
      */
 581  0
     protected int _port = -1;
 582  
 
 583  
 
 584  
     /**
 585  
      * The path.
 586  
      */
 587  0
     protected char[] _path = null;
 588  
 
 589  
 
 590  
     /**
 591  
      * The query.
 592  
      */
 593  0
     protected char[] _query = null;
 594  
 
 595  
 
 596  
     /**
 597  
      * The fragment.
 598  
      */
 599  0
     protected char[] _fragment = null;
 600  
 
 601  
 
 602  
     /**
 603  
      * The root path.
 604  
      */
 605  0
     protected static char[] rootPath = { '/' };
 606  
 
 607  
     // ---------------------- Generous characters for each component validation
 608  
 
 609  
     /**
 610  
      * The percent "%" character always has the reserved purpose of being the
 611  
      * escape indicator, it must be escaped as "%25" in order to be used as
 612  
      * data within a URI.
 613  
      */
 614  0
     protected static final BitSet percent = new BitSet(256);
 615  
     // Static initializer for percent
 616  
     static {
 617  0
         percent.set('%');
 618  
     }
 619  
 
 620  
 
 621  
     /**
 622  
      * BitSet for digit.
 623  
      * <p><blockquote><pre>
 624  
      * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
 625  
      *            "8" | "9"
 626  
      * </pre></blockquote><p>
 627  
      */
 628  0
     protected static final BitSet digit = new BitSet(256);
 629  
     // Static initializer for digit
 630  
     static {
 631  0
         for(int i = '0'; i <= '9'; i++) {
 632  0
             digit.set(i);
 633  
         }
 634  
     }
 635  
 
 636  
 
 637  
     /**
 638  
      * BitSet for alpha.
 639  
      * <p><blockquote><pre>
 640  
      * alpha         = lowalpha | upalpha
 641  
      * </pre></blockquote><p>
 642  
      */
 643  0
     protected static final BitSet alpha = new BitSet(256);
 644  
     // Static initializer for alpha
 645  
     static {
 646  0
         for (int i = 'a'; i <= 'z'; i++) {
 647  0
             alpha.set(i);
 648  
         }
 649  0
         for (int i = 'A'; i <= 'Z'; i++) {
 650  0
             alpha.set(i);
 651  
         }
 652  
     }
 653  
 
 654  
 
 655  
     /**
 656  
      * BitSet for alphanum (join of alpha &amp; digit).
 657  
      * <p><blockquote><pre>
 658  
      *  alphanum      = alpha | digit
 659  
      * </pre></blockquote><p>
 660  
      */
 661  0
     protected static final BitSet alphanum = new BitSet(256);
 662  
     // Static initializer for alphanum
 663  
     static {
 664  0
         alphanum.or(alpha);
 665  0
         alphanum.or(digit);
 666  
     }
 667  
 
 668  
 
 669  
     /**
 670  
      * BitSet for hex.
 671  
      * <p><blockquote><pre>
 672  
      * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
 673  
      *                         "a" | "b" | "c" | "d" | "e" | "f"
 674  
      * </pre></blockquote><p>
 675  
      */
 676  0
     protected static final BitSet hex = new BitSet(256);
 677  
     // Static initializer for hex
 678  
     static {
 679  0
         hex.or(digit);
 680  0
         for(int i = 'a'; i <= 'f'; i++) {
 681  0
             hex.set(i);
 682  
         }
 683  0
         for(int i = 'A'; i <= 'F'; i++) {
 684  0
             hex.set(i);
 685  
         }
 686  
     }
 687  
 
 688  
 
 689  
     /**
 690  
      * BitSet for escaped.
 691  
      * <p><blockquote><pre>
 692  
      * escaped       = "%" hex hex
 693  
      * </pre></blockquote><p>
 694  
      */
 695  0
     protected static final BitSet escaped = new BitSet(256);
 696  
     // Static initializer for escaped
 697  
     static {
 698  0
         escaped.or(percent);
 699  0
         escaped.or(hex);
 700  
     }
 701  
 
 702  
 
 703  
     /**
 704  
      * BitSet for mark.
 705  
      * <p><blockquote><pre>
 706  
      * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
 707  
      *                 "(" | ")"
 708  
      * </pre></blockquote><p>
 709  
      */
 710  0
     protected static final BitSet mark = new BitSet(256);
 711  
     // Static initializer for mark
 712  
     static {
 713  0
         mark.set('-');
 714  0
         mark.set('_');
 715  0
         mark.set('.');
 716  0
         mark.set('!');
 717  0
         mark.set('~');
 718  0
         mark.set('*');
 719  0
         mark.set('\'');
 720  0
         mark.set('(');
 721  0
         mark.set(')');
 722  
     }
 723  
 
 724  
 
 725  
     /**
 726  
      * Data characters that are allowed in a URI but do not have a reserved
 727  
      * purpose are called unreserved.
 728  
      * <p><blockquote><pre>
 729  
      * unreserved    = alphanum | mark
 730  
      * </pre></blockquote><p>
 731  
      */
 732  0
     protected static final BitSet unreserved = new BitSet(256);
 733  
     // Static initializer for unreserved
 734  
     static {
 735  0
         unreserved.or(alphanum);
 736  0
         unreserved.or(mark);
 737  
     }
 738  
 
 739  
 
 740  
     /**
 741  
      * BitSet for reserved.
 742  
      * <p><blockquote><pre>
 743  
      * reserved      = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" |
 744  
      *                 "$" | ","
 745  
      * </pre></blockquote><p>
 746  
      */
 747  0
     protected static final BitSet reserved = new BitSet(256);
 748  
     // Static initializer for reserved
 749  
     static {
 750  0
         reserved.set(';');
 751  0
         reserved.set('/');
 752  0
         reserved.set('?');
 753  0
         reserved.set(':');
 754  0
         reserved.set('@');
 755  0
         reserved.set('&');
 756  0
         reserved.set('=');
 757  0
         reserved.set('+');
 758  0
         reserved.set('$');
 759  0
         reserved.set(',');
 760  
     }
 761  
 
 762  
 
 763  
     /**
 764  
      * BitSet for uric.
 765  
      * <p><blockquote><pre>
 766  
      * uric          = reserved | unreserved | escaped
 767  
      * </pre></blockquote><p>
 768  
      */
 769  0
     protected static final BitSet uric = new BitSet(256);
 770  
     // Static initializer for uric
 771  
     static {
 772  0
         uric.or(reserved);
 773  0
         uric.or(unreserved);
 774  0
         uric.or(escaped);
 775  
     }
 776  
 
 777  
 
 778  
     /**
 779  
      * BitSet for fragment (alias for uric).
 780  
      * <p><blockquote><pre>
 781  
      * fragment      = *uric
 782  
      * </pre></blockquote><p>
 783  
      */
 784  0
     protected static final BitSet fragment = uric;
 785  
 
 786  
 
 787  
     /**
 788  
      * BitSet for query (alias for uric).
 789  
      * <p><blockquote><pre>
 790  
      * query         = *uric
 791  
      * </pre></blockquote><p>
 792  
      */
 793  0
     protected static final BitSet query = uric;
 794  
 
 795  
 
 796  
     /**
 797  
      * BitSet for pchar.
 798  
      * <p><blockquote><pre>
 799  
      * pchar         = unreserved | escaped |
 800  
      *                 ":" | "@" | "&amp;" | "=" | "+" | "$" | ","
 801  
      * </pre></blockquote><p>
 802  
      */
 803  0
     protected static final BitSet pchar = new BitSet(256);
 804  
     // Static initializer for pchar
 805  
     static {
 806  0
         pchar.or(unreserved);
 807  0
         pchar.or(escaped);
 808  0
         pchar.set(':');
 809  0
         pchar.set('@');
 810  0
         pchar.set('&');
 811  0
         pchar.set('=');
 812  0
         pchar.set('+');
 813  0
         pchar.set('$');
 814  0
         pchar.set(',');
 815  
     }
 816  
 
 817  
 
 818  
     /**
 819  
      * BitSet for param (alias for pchar).
 820  
      * <p><blockquote><pre>
 821  
      * param         = *pchar
 822  
      * </pre></blockquote><p>
 823  
      */
 824  0
     protected static final BitSet param = pchar;
 825  
 
 826  
 
 827  
     /**
 828  
      * BitSet for segment.
 829  
      * <p><blockquote><pre>
 830  
      * segment       = *pchar *( ";" param )
 831  
      * </pre></blockquote><p>
 832  
      */
 833  0
     protected static final BitSet segment = new BitSet(256);
 834  
     // Static initializer for segment
 835  
     static {
 836  0
         segment.or(pchar);
 837  0
         segment.set(';');
 838  0
         segment.or(param);
 839  
     }
 840  
 
 841  
 
 842  
     /**
 843  
      * BitSet for path segments.
 844  
      * <p><blockquote><pre>
 845  
      * path_segments = segment *( "/" segment )
 846  
      * </pre></blockquote><p>
 847  
      */
 848  0
     protected static final BitSet path_segments = new BitSet(256);
 849  
     // Static initializer for path_segments
 850  
     static {
 851  0
         path_segments.set('/');
 852  0
         path_segments.or(segment);
 853  
     }
 854  
 
 855  
 
 856  
     /**
 857  
      * URI absolute path.
 858  
      * <p><blockquote><pre>
 859  
      * abs_path      = "/"  path_segments
 860  
      * </pre><blockquote><p>
 861  
      */
 862  0
     protected static final BitSet abs_path = new BitSet(256);
 863  
     // Static initializer for abs_path
 864  
     static {
 865  0
         abs_path.set('/');
 866  0
         abs_path.or(path_segments);
 867  
     }
 868  
 
 869  
 
 870  
     /**
 871  
      * URI bitset for encoding typical non-slash characters.
 872  
      * <p><blockquote><pre>
 873  
      * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
 874  
      *                 "&amp;" | "=" | "+" | "$" | ","
 875  
      * </pre></blockquote><p>
 876  
      */
 877  0
     protected static final BitSet uric_no_slash = new BitSet(256);
 878  
     // Static initializer for uric_no_slash
 879  
     static {
 880  0
         uric_no_slash.or(unreserved);
 881  0
         uric_no_slash.or(escaped);
 882  0
         uric_no_slash.set(';');
 883  0
         uric_no_slash.set('?');
 884  0
         uric_no_slash.set(';');
 885  0
         uric_no_slash.set('@');
 886  0
         uric_no_slash.set('&');
 887  0
         uric_no_slash.set('=');
 888  0
         uric_no_slash.set('+');
 889  0
         uric_no_slash.set('$');
 890  0
         uric_no_slash.set(',');
 891  
     }
 892  
     
 893  
 
 894  
     /**
 895  
      * URI bitset that combines uric_no_slash and uric.
 896  
      * <p><blockquote><pre>
 897  
      * opaque_part   = uric_no_slash *uric
 898  
      * </pre></blockquote><p>
 899  
      */
 900  0
     protected static final BitSet opaque_part = new BitSet(256);
 901  
     // Static initializer for opaque_part
 902  
     static {
 903  0
         opaque_part.or(uric_no_slash);
 904  0
         opaque_part.or(uric);
 905  
     }
 906  
     
 907  
 
 908  
     /**
 909  
      * URI bitset that combines absolute path and opaque part.
 910  
      * <p><blockquote><pre>
 911  
      * path          = [ abs_path | opaque_part ]
 912  
      * </pre></blockquote><p>
 913  
      */
 914  0
     protected static final BitSet path = new BitSet(256);
 915  
     // Static initializer for path
 916  
     static {
 917  0
         path.or(abs_path);
 918  0
         path.or(opaque_part);
 919  
     }
 920  
 
 921  
 
 922  
     /**
 923  
      * Port, a logical alias for digit.
 924  
      */
 925  0
     protected static final BitSet port = digit;
 926  
 
 927  
 
 928  
     /**
 929  
      * Bitset that combines digit and dot fo IPv$address.
 930  
      * <p><blockquote><pre>
 931  
      * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
 932  
      * </pre></blockquote><p>
 933  
      */
 934  0
     protected static final BitSet IPv4address = new BitSet(256);
 935  
     // Static initializer for IPv4address
 936  
     static {
 937  0
         IPv4address.or(digit);
 938  0
         IPv4address.set('.');
 939  
     }
 940  
 
 941  
 
 942  
     /**
 943  
      * RFC 2373.
 944  
      * <p><blockquote><pre>
 945  
      * IPv6address = hexpart [ ":" IPv4address ]
 946  
      * </pre></blockquote><p>
 947  
      */
 948  0
     protected static final BitSet IPv6address = new BitSet(256);
 949  
     // Static initializer for IPv6address reference
 950  
     static {
 951  0
         IPv6address.or(hex); // hexpart
 952  0
         IPv6address.set(':');
 953  0
         IPv6address.or(IPv4address);
 954  
     }
 955  
 
 956  
 
 957  
     /**
 958  
      * RFC 2732, 2373.
 959  
      * <p><blockquote><pre>
 960  
      * IPv6reference   = "[" IPv6address "]"
 961  
      * </pre></blockquote><p>
 962  
      */
 963  0
     protected static final BitSet IPv6reference = new BitSet(256);
 964  
     // Static initializer for IPv6reference
 965  
     static {
 966  0
         IPv6reference.set('[');
 967  0
         IPv6reference.or(IPv6address);
 968  0
         IPv6reference.set(']');
 969  
     }
 970  
 
 971  
 
 972  
     /**
 973  
      * BitSet for toplabel.
 974  
      * <p><blockquote><pre>
 975  
      * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
 976  
      * </pre></blockquote><p>
 977  
      */
 978  0
     protected static final BitSet toplabel = new BitSet(256);
 979  
     // Static initializer for toplabel
 980  
     static {
 981  0
         toplabel.or(alphanum);
 982  0
         toplabel.set('-');
 983  
     }
 984  
 
 985  
 
 986  
     /**
 987  
      * BitSet for domainlabel.
 988  
      * <p><blockquote><pre>
 989  
      * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
 990  
      * </pre></blockquote><p>
 991  
      */
 992  0
     protected static final BitSet domainlabel = toplabel;
 993  
 
 994  
 
 995  
     /**
 996  
      * BitSet for hostname.
 997  
      * <p><blockquote><pre>
 998  
      * hostname      = *( domainlabel "." ) toplabel [ "." ]
 999  
      * </pre></blockquote><p>
 1000  
      */
 1001  0
     protected static final BitSet hostname = new BitSet(256);
 1002  
     // Static initializer for hostname
 1003  
     static {
 1004  0
         hostname.or(toplabel);
 1005  
         // hostname.or(domainlabel);
 1006  0
         hostname.set('.');
 1007  
     }
 1008  
 
 1009  
 
 1010  
     /**
 1011  
      * BitSet for host.
 1012  
      * <p><blockquote><pre>
 1013  
      * host          = hostname | IPv4address | IPv6reference
 1014  
      * </pre></blockquote><p>
 1015  
      */
 1016  0
     protected static final BitSet host = new BitSet(256);
 1017  
     // Static initializer for host
 1018  
     static {
 1019  0
         host.or(hostname);
 1020  
         // host.or(IPv4address);
 1021  0
         host.or(IPv6reference); // IPv4address
 1022  
     }
 1023  
 
 1024  
 
 1025  
     /**
 1026  
      * BitSet for hostport.
 1027  
      * <p><blockquote><pre>
 1028  
      * hostport      = host [ ":" port ]
 1029  
      * </pre></blockquote><p>
 1030  
      */
 1031  0
     protected static final BitSet hostport = new BitSet(256);
 1032  
     // Static initializer for hostport
 1033  
     static {
 1034  0
         hostport.or(host);
 1035  0
         hostport.set(':');
 1036  0
         hostport.or(port);
 1037  
     }
 1038  
 
 1039  
 
 1040  
     /**
 1041  
      * Bitset for userinfo.
 1042  
      * <p><blockquote><pre>
 1043  
      * userinfo      = *( unreserved | escaped |
 1044  
      *                    ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
 1045  
      * </pre></blockquote><p>
 1046  
      */
 1047  0
     protected static final BitSet userinfo = new BitSet(256);
 1048  
     // Static initializer for userinfo
 1049  
     static {
 1050  0
         userinfo.or(unreserved);
 1051  0
         userinfo.or(escaped);
 1052  0
         userinfo.set(';');
 1053  0
         userinfo.set(':');
 1054  0
         userinfo.set('&');
 1055  0
         userinfo.set('=');
 1056  0
         userinfo.set('+');
 1057  0
         userinfo.set('$');
 1058  0
         userinfo.set(',');
 1059  
     }
 1060  
 
 1061  
 
 1062  
     /**
 1063  
      * BitSet for within the userinfo component like user and password.
 1064  
      */
 1065  0
     public static final BitSet within_userinfo = new BitSet(256);
 1066  
     // Static initializer for within_userinfo
 1067  
     static {
 1068  0
         within_userinfo.or(userinfo);
 1069  0
         within_userinfo.clear(';'); // reserved within authority
 1070  0
         within_userinfo.clear(':');
 1071  0
         within_userinfo.clear('@');
 1072  0
         within_userinfo.clear('?');
 1073  0
         within_userinfo.clear('/');
 1074  
     }
 1075  
 
 1076  
 
 1077  
     /**
 1078  
      * Bitset for server.
 1079  
      * <p><blockquote><pre>
 1080  
      * server        = [ [ userinfo "@" ] hostport ]
 1081  
      * </pre></blockquote><p>
 1082  
      */
 1083  0
     protected static final BitSet server = new BitSet(256);
 1084  
     // Static initializer for server
 1085  
     static {
 1086  0
         server.or(userinfo);
 1087  0
         server.set('@');
 1088  0
         server.or(hostport);
 1089  
     }
 1090  
 
 1091  
 
 1092  
     /**
 1093  
      * BitSet for reg_name.
 1094  
      * <p><blockquote><pre>
 1095  
      * reg_name      = 1*( unreserved | escaped | "$" | "," |
 1096  
      *                     ";" | ":" | "@" | "&amp;" | "=" | "+" )
 1097  
      * </pre></blockquote><p>
 1098  
      */
 1099  0
     protected static final BitSet reg_name = new BitSet(256);
 1100  
     // Static initializer for reg_name
 1101  
     static {
 1102  0
         reg_name.or(unreserved);
 1103  0
         reg_name.or(escaped);
 1104  0
         reg_name.set('$');
 1105  0
         reg_name.set(',');
 1106  0
         reg_name.set(';');
 1107  0
         reg_name.set(':');
 1108  0
         reg_name.set('@');
 1109  0
         reg_name.set('&');
 1110  0
         reg_name.set('=');
 1111  0
         reg_name.set('+');
 1112  
     }
 1113  
 
 1114  
 
 1115  
     /**
 1116  
      * BitSet for authority.
 1117  
      * <p><blockquote><pre>
 1118  
      * authority     = server | reg_name
 1119  
      * </pre></blockquote><p>
 1120  
      */
 1121  0
     protected static final BitSet authority = new BitSet(256);
 1122  
     // Static initializer for authority
 1123  
     static {
 1124  0
         authority.or(server);
 1125  0
         authority.or(reg_name);
 1126  
     }
 1127  
 
 1128  
 
 1129  
     /**
 1130  
      * BitSet for scheme.
 1131  
      * <p><blockquote><pre>
 1132  
      * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
 1133  
      * </pre></blockquote><p>
 1134  
      */
 1135  0
     protected static final BitSet scheme = new BitSet(256);
 1136  
     // Static initializer for scheme
 1137  
     static {
 1138  0
         scheme.or(alpha);
 1139  0
         scheme.or(digit);
 1140  0
         scheme.set('+');
 1141  0
         scheme.set('-');
 1142  0
         scheme.set('.');
 1143  
     }
 1144  
 
 1145  
 
 1146  
     /**
 1147  
      * BitSet for rel_segment.
 1148  
      * <p><blockquote><pre>
 1149  
      * rel_segment   = 1*( unreserved | escaped |
 1150  
      *                     ";" | "@" | "&amp;" | "=" | "+" | "$" | "," )
 1151  
      * </pre></blockquote><p>
 1152  
      */
 1153  0
     protected static final BitSet rel_segment = new BitSet(256);
 1154  
     // Static initializer for rel_segment
 1155  
     static {
 1156  0
         rel_segment.or(unreserved);
 1157  0
         rel_segment.or(escaped);
 1158  0
         rel_segment.set(';');
 1159  0
         rel_segment.set('@');
 1160  0
         rel_segment.set('&');
 1161  0
         rel_segment.set('=');
 1162  0
         rel_segment.set('+');
 1163  0
         rel_segment.set('$');
 1164  0
         rel_segment.set(',');
 1165  
     }
 1166  
 
 1167  
 
 1168  
     /**
 1169  
      * BitSet for rel_path.
 1170  
      * <p><blockquote><pre>
 1171  
      * rel_path      = rel_segment [ abs_path ]
 1172  
      * </pre></blockquote><p>
 1173  
      */
 1174  0
     protected static final BitSet rel_path = new BitSet(256);
 1175  
     // Static initializer for rel_path
 1176  
     static {
 1177  0
         rel_path.or(rel_segment);
 1178  0
         rel_path.or(abs_path);
 1179  
     }
 1180  
 
 1181  
 
 1182  
     /**
 1183  
      * BitSet for net_path.
 1184  
      * <p><blockquote><pre>
 1185  
      * net_path      = "//" authority [ abs_path ]
 1186  
      * </pre></blockquote><p>
 1187  
      */
 1188  0
     protected static final BitSet net_path = new BitSet(256);
 1189  
     // Static initializer for net_path
 1190  
     static {
 1191  0
         net_path.set('/');
 1192  0
         net_path.or(authority);
 1193  0
         net_path.or(abs_path);
 1194  
     }
 1195  
     
 1196  
 
 1197  
     /**
 1198  
      * BitSet for hier_part.
 1199  
      * <p><blockquote><pre>
 1200  
      * hier_part     = ( net_path | abs_path ) [ "?" query ]
 1201  
      * </pre></blockquote><p>
 1202  
      */
 1203  0
     protected static final BitSet hier_part = new BitSet(256);
 1204  
     // Static initializer for hier_part
 1205  
     static {
 1206  0
         hier_part.or(net_path);
 1207  0
         hier_part.or(abs_path);
 1208  
         // hier_part.set('?'); aleady included
 1209  0
         hier_part.or(query);
 1210  
     }
 1211  
 
 1212  
 
 1213  
     /**
 1214  
      * BitSet for relativeURI.
 1215  
      * <p><blockquote><pre>
 1216  
      * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
 1217  
      * </pre></blockquote><p>
 1218  
      */
 1219  0
     protected static final BitSet relativeURI = new BitSet(256);
 1220  
     // Static initializer for relativeURI
 1221  
     static {
 1222  0
         relativeURI.or(net_path);
 1223  0
         relativeURI.or(abs_path);
 1224  0
         relativeURI.or(rel_path);
 1225  
         // relativeURI.set('?'); aleady included
 1226  0
         relativeURI.or(query);
 1227  
     }
 1228  
 
 1229  
 
 1230  
     /**
 1231  
      * BitSet for absoluteURI.
 1232  
      * <p><blockquote><pre>
 1233  
      * absoluteURI   = scheme ":" ( hier_part | opaque_part )
 1234  
      * </pre></blockquote><p>
 1235  
      */
 1236  0
     protected static final BitSet absoluteURI = new BitSet(256);
 1237  
     // Static initializer for absoluteURI
 1238  
     static {
 1239  0
         absoluteURI.or(scheme);
 1240  0
         absoluteURI.set(':');
 1241  0
         absoluteURI.or(hier_part);
 1242  0
         absoluteURI.or(opaque_part);
 1243  
     }
 1244  
 
 1245  
 
 1246  
     /**
 1247  
      * BitSet for URI-reference.
 1248  
      * <p><blockquote><pre>
 1249  
      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 1250  
      * </pre></blockquote><p>
 1251  
      */
 1252  0
     protected static final BitSet URI_reference = new BitSet(256);
 1253  
     // Static initializer for URI_reference
 1254  
     static {
 1255  0
         URI_reference.or(absoluteURI);
 1256  0
         URI_reference.or(relativeURI);
 1257  0
         URI_reference.set('#');
 1258  0
         URI_reference.or(fragment);
 1259  
     }
 1260  
 
 1261  
     // ---------------------------- Characters disallowed within the URI syntax
 1262  
     // Excluded US-ASCII Characters are like control, space, delims and unwise
 1263  
 
 1264  
     /**
 1265  
      * BitSet for control.
 1266  
      */
 1267  0
     public static final BitSet control = new BitSet(256);
 1268  
     // Static initializer for control
 1269  
     static {
 1270  0
         for (int i = 0; i <= 0x1F; i++) {
 1271  0
             control.set(i);
 1272  
         }
 1273  0
         control.set(0x7F);
 1274  
     }
 1275  
 
 1276  
     /**
 1277  
      * BitSet for space.
 1278  
      */
 1279  0
     public static final BitSet space = new BitSet(256);
 1280  
     // Static initializer for space
 1281  
     static {
 1282  0
         space.set(0x20);
 1283  
     }
 1284  
 
 1285  
 
 1286  
     /**
 1287  
      * BitSet for delims.
 1288  
      */
 1289  0
     public static final BitSet delims = new BitSet(256);
 1290  
     // Static initializer for delims
 1291  
     static {
 1292  0
         delims.set('<');
 1293  0
         delims.set('>');
 1294  0
         delims.set('#');
 1295  0
         delims.set('%');
 1296  0
         delims.set('"');
 1297  
     }
 1298  
 
 1299  
 
 1300  
     /**
 1301  
      * BitSet for unwise.
 1302  
      */
 1303  0
     public static final BitSet unwise = new BitSet(256);
 1304  
     // Static initializer for unwise
 1305  
     static {
 1306  0
         unwise.set('{');
 1307  0
         unwise.set('}');
 1308  0
         unwise.set('|');
 1309  0
         unwise.set('\\');
 1310  0
         unwise.set('^');
 1311  0
         unwise.set('[');
 1312  0
         unwise.set(']');
 1313  0
         unwise.set('`');
 1314  
     }
 1315  
 
 1316  
 
 1317  
     /**
 1318  
      * Disallowed rel_path before escaping.
 1319  
      */
 1320  0
     public static final BitSet disallowed_rel_path = new BitSet(256);
 1321  
     // Static initializer for disallowed_rel_path
 1322  
     static {
 1323  0
         disallowed_rel_path.or(uric);
 1324  0
         disallowed_rel_path.andNot(rel_path);
 1325  
     }
 1326  
 
 1327  
 
 1328  
     /**
 1329  
      * Disallowed opaque_part before escaping.
 1330  
      */
 1331  0
     public static final BitSet disallowed_opaque_part = new BitSet(256);
 1332  
     // Static initializer for disallowed_opaque_part
 1333  
     static {
 1334  0
         disallowed_opaque_part.or(uric);
 1335  0
         disallowed_opaque_part.andNot(opaque_part);
 1336  
     }
 1337  
 
 1338  
     // ----------------------- Characters allowed within and for each component
 1339  
 
 1340  
     /**
 1341  
      * Those characters that are allowed for the authority component.
 1342  
      */
 1343  0
     public static final BitSet allowed_authority = new BitSet(256);
 1344  
     // Static initializer for allowed_authority
 1345  
     static {
 1346  0
         allowed_authority.or(authority);
 1347  0
         allowed_authority.clear('%');
 1348  
     }
 1349  
 
 1350  
 
 1351  
     /**
 1352  
      * Those characters that are allowed for the opaque_part.
 1353  
      */
 1354  0
     public static final BitSet allowed_opaque_part = new BitSet(256);
 1355  
     // Static initializer for allowed_opaque_part 
 1356  
     static {
 1357  0
         allowed_opaque_part.or(opaque_part);
 1358  0
         allowed_opaque_part.clear('%');
 1359  
     }
 1360  
 
 1361  
 
 1362  
     /**
 1363  
      * Those characters that are allowed for the reg_name.
 1364  
      */
 1365  0
     public static final BitSet allowed_reg_name = new BitSet(256);
 1366  
     // Static initializer for allowed_reg_name 
 1367  
     static {
 1368  0
         allowed_reg_name.or(reg_name);
 1369  
         // allowed_reg_name.andNot(percent);
 1370  0
         allowed_reg_name.clear('%');
 1371  
     }
 1372  
 
 1373  
 
 1374  
     /**
 1375  
      * Those characters that are allowed for the userinfo component.
 1376  
      */
 1377  0
     public static final BitSet allowed_userinfo = new BitSet(256);
 1378  
     // Static initializer for allowed_userinfo
 1379  
     static {
 1380  0
         allowed_userinfo.or(userinfo);
 1381  
         // allowed_userinfo.andNot(percent);
 1382  0
         allowed_userinfo.clear('%');
 1383  
     }
 1384  
 
 1385  
 
 1386  
     /**
 1387  
      * Those characters that are allowed for within the userinfo component.
 1388  
      */
 1389  0
     public static final BitSet allowed_within_userinfo = new BitSet(256);
 1390  
     // Static initializer for allowed_within_userinfo
 1391  
     static {
 1392  0
         allowed_within_userinfo.or(within_userinfo);
 1393  0
         allowed_within_userinfo.clear('%');
 1394  
     }
 1395  
 
 1396  
 
 1397  
     /**
 1398  
      * Those characters that are allowed for the IPv6reference component.
 1399  
      * The characters '[', ']' in IPv6reference should be excluded.
 1400  
      */
 1401  0
     public static final BitSet allowed_IPv6reference = new BitSet(256);
 1402  
     // Static initializer for allowed_IPv6reference
 1403  
     static {
 1404  0
         allowed_IPv6reference.or(IPv6reference);
 1405  
         // allowed_IPv6reference.andNot(unwise);
 1406  0
         allowed_IPv6reference.clear('[');
 1407  0
         allowed_IPv6reference.clear(']');
 1408  
     }
 1409  
 
 1410  
 
 1411  
     /**
 1412  
      * Those characters that are allowed for the host component.
 1413  
      * The characters '[', ']' in IPv6reference should be excluded.
 1414  
      */
 1415  0
     public static final BitSet allowed_host = new BitSet(256);
 1416  
     // Static initializer for allowed_host
 1417  
     static {
 1418  0
         allowed_host.or(hostname);
 1419  0
         allowed_host.or(allowed_IPv6reference);
 1420  
     }
 1421  
 
 1422  
 
 1423  
     /**
 1424  
      * Those characters that are allowed for the authority component.
 1425  
      */
 1426  0
     public static final BitSet allowed_within_authority = new BitSet(256);
 1427  
     // Static initializer for allowed_within_authority
 1428  
     static {
 1429  0
         allowed_within_authority.or(server);
 1430  0
         allowed_within_authority.or(reg_name);
 1431  0
         allowed_within_authority.clear(';');
 1432  0
         allowed_within_authority.clear(':');
 1433  0
         allowed_within_authority.clear('@');
 1434  0
         allowed_within_authority.clear('?');
 1435  0
         allowed_within_authority.clear('/');
 1436  
     }
 1437  
 
 1438  
 
 1439  
     /**
 1440  
      * Those characters that are allowed for the abs_path.
 1441  
      */
 1442  0
     public static final BitSet allowed_abs_path = new BitSet(256);
 1443  
     // Static initializer for allowed_abs_path
 1444  
     static {
 1445  0
         allowed_abs_path.or(abs_path);
 1446  
         // allowed_abs_path.set('/');  // aleady included
 1447  0
         allowed_abs_path.andNot(percent);
 1448  
     }
 1449  
 
 1450  
 
 1451  
     /**
 1452  
      * Those characters that are allowed for the rel_path.
 1453  
      */
 1454  0
     public static final BitSet allowed_rel_path = new BitSet(256);
 1455  
     // Static initializer for allowed_rel_path
 1456  
     static {
 1457  0
         allowed_rel_path.or(rel_path);
 1458  0
         allowed_rel_path.clear('%');
 1459  
     }
 1460  
 
 1461  
 
 1462  
     /**
 1463  
      * Those characters that are allowed within the path.
 1464  
      */
 1465  0
     public static final BitSet allowed_within_path = new BitSet(256);
 1466  
     // Static initializer for allowed_within_path
 1467  
     static {
 1468  0
         allowed_within_path.or(abs_path);
 1469  0
         allowed_within_path.clear('/');
 1470  0
         allowed_within_path.clear(';');
 1471  0
         allowed_within_path.clear('=');
 1472  0
         allowed_within_path.clear('?');
 1473  
     }
 1474  
 
 1475  
 
 1476  
     /**
 1477  
      * Those characters that are allowed for the query component.
 1478  
      */
 1479  0
     public static final BitSet allowed_query = new BitSet(256);
 1480  
     // Static initializer for allowed_query
 1481  
     static {
 1482  0
         allowed_query.or(uric);
 1483  0
         allowed_query.clear('%');
 1484  
     }
 1485  
 
 1486  
 
 1487  
     /**
 1488  
      * Those characters that are allowed within the query component.
 1489  
      */
 1490  0
     public static final BitSet allowed_within_query = new BitSet(256);
 1491  
     // Static initializer for allowed_within_query
 1492  
     static {
 1493  0
         allowed_within_query.or(allowed_query);
 1494  0
         allowed_within_query.andNot(reserved); // excluded 'reserved'
 1495  0
         allowed_within_query.clear('#'); // avoid confict with the fragment
 1496  
     }
 1497  
 
 1498  
 
 1499  
     /**
 1500  
      * Those characters that are allowed for the fragment component.
 1501  
      */
 1502  0
     public static final BitSet allowed_fragment = new BitSet(256);
 1503  
     // Static initializer for allowed_fragment
 1504  
     static {
 1505  0
         allowed_fragment.or(uric);
 1506  0
         allowed_fragment.clear('%');
 1507  0
     }
 1508  
 
 1509  
     // ------------------------------------------- Flags for this URI-reference
 1510  
 
 1511  
     // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 1512  
     // absoluteURI   = scheme ":" ( hier_part | opaque_part )
 1513  
     protected boolean _is_hier_part;
 1514  
     protected boolean _is_opaque_part;
 1515  
     // relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ] 
 1516  
     // hier_part     = ( net_path | abs_path ) [ "?" query ]
 1517  
     protected boolean _is_net_path;
 1518  
     protected boolean _is_abs_path;
 1519  
     protected boolean _is_rel_path;
 1520  
     // net_path      = "//" authority [ abs_path ] 
 1521  
     // authority     = server | reg_name
 1522  
     protected boolean _is_reg_name;
 1523  
     protected boolean _is_server;  // = _has_server
 1524  
     // server        = [ [ userinfo "@" ] hostport ]
 1525  
     // host          = hostname | IPv4address | IPv6reference
 1526  
     protected boolean _is_hostname;
 1527  
     protected boolean _is_IPv4address;
 1528  
     protected boolean _is_IPv6reference;
 1529  
 
 1530  
     // ------------------------------------------ Character and escape encoding
 1531  
     
 1532  
     /**
 1533  
      * Encode with the default protocol charset.
 1534  
      *
 1535  
      * @param original the original character sequence
 1536  
      * @param allowed those characters that are allowed within a component
 1537  
      * @return URI character sequence
 1538  
      * @exception IOException null component or unsupported character encoding
 1539  
      */
 1540  
     protected static char[] encode(String original, BitSet allowed)
 1541  
         throws IOException {
 1542  
 
 1543  0
         return encode(original, allowed, _protocolCharset);
 1544  
     }
 1545  
 
 1546  
 
 1547  
     /**
 1548  
      * Encodes URI string.
 1549  
      *
 1550  
      * This is a two mapping, one from original characters to octets, and
 1551  
      * subsequently a second from octets to URI characters:
 1552  
      * <p><blockquote><pre>
 1553  
      *   original character sequence->octet sequence->URI character sequence
 1554  
      * </pre></blockquote><p>
 1555  
      *
 1556  
      * An escaped octet is encoded as a character triplet, consisting of the
 1557  
      * percent character "%" followed by the two hexadecimal digits
 1558  
      * representing the octet code. For example, "%20" is the escaped
 1559  
      * encoding for the US-ASCII space character.
 1560  
      * <p>
 1561  
      * Conversion from the local filesystem character set to UTF-8 will
 1562  
      * normally involve a two step process. First convert the local character
 1563  
      * set to the UCS; then convert the UCS to UTF-8.
 1564  
      * The first step in the process can be performed by maintaining a mapping
 1565  
      * table that includes the local character set code and the corresponding
 1566  
      * UCS code.
 1567  
      * The next step is to convert the UCS character code to the UTF-8 encoding.
 1568  
      * <p>
 1569  
      * Mapping between vendor codepages can be done in a very similar manner
 1570  
      * as described above.
 1571  
      * <p>
 1572  
      * The only time escape encodings can allowedly be made is when a URI is
 1573  
      * being created from its component parts.  The escape and validate methods
 1574  
      * are internally performed within this method.
 1575  
      *
 1576  
      * @param original the original character sequence
 1577  
      * @param allowed those characters that are allowed within a component
 1578  
      * @param charset the protocol charset
 1579  
      * @return URI character sequence
 1580  
      * @exception IOException null component or unsupported character encoding
 1581  
      */
 1582  
     protected static char[] encode(String original, BitSet allowed,
 1583  
             String charset) throws IOException {
 1584  
 
 1585  
         // encode original to uri characters.
 1586  0
         if (original == null) {
 1587  0
             throw new IOException(/*IOException.PARSING,*/ "URI: null");
 1588  
         }
 1589  
         // escape octet to uri characters.
 1590  0
         if (allowed == null) {
 1591  0
             throw new IOException(/*IOException.PARSING,*/
 1592  0
                     "URI: null allowed characters");
 1593  
         }
 1594  
         byte[] octets;
 1595  
         try {
 1596  0
             octets = original.getBytes(charset);
 1597  0
         } catch (UnsupportedEncodingException error) {
 1598  0
             throw new IOException(/*IOException.UNSUPPORTED_ENCODING,*/ "Unsupported Encoding: " + charset);
 1599  0
         }
 1600  0
         StringBuffer buf = new StringBuffer(octets.length);
 1601  0
         for (int i = 0; i < octets.length; i++) {
 1602  0
             char c = (char) octets[i];
 1603  0
             if (allowed.get(c)) {
 1604  0
                 buf.append(c);
 1605  0
             } else {
 1606  0
                 buf.append('%');
 1607  0
                 byte b = octets[i]; // use the original byte value
 1608  0
                 char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
 1609  0
                 buf.append(Character.toUpperCase(hexadecimal)); // high
 1610  0
                 hexadecimal = Character.forDigit(b & 0xF, 16);
 1611  0
                 buf.append(Character.toUpperCase(hexadecimal)); // low
 1612  
             }
 1613  
         }
 1614  
 
 1615  0
         return buf.toString().toCharArray();
 1616  
     }
 1617  
 
 1618  
 
 1619  
     /**
 1620  
      * Decode with the default protocol charset.
 1621  
      *
 1622  
      * @param component the URI character sequence
 1623  
      * @return original character sequence
 1624  
      * @exception IOException incomplete trailing escape pattern
 1625  
      * or unsupported character encoding
 1626  
      */
 1627  
     protected static String decode(char[] component) throws IOException {
 1628  0
         return decode(component, _protocolCharset);
 1629  
     }
 1630  
 
 1631  
 
 1632  
     /**
 1633  
      * Decodes URI encoded string.
 1634  
      *
 1635  
      * This is a two mapping, one from URI characters to octets, and
 1636  
      * subsequently a second from octets to original characters:
 1637  
      * <p><blockquote><pre>
 1638  
      *   URI character sequence->octet sequence->original character sequence
 1639  
      * </pre></blockquote><p>
 1640  
      *
 1641  
      * A URI must be separated into its components before the escaped
 1642  
      * characters within those components can be allowedly decoded.
 1643  
      * <p>
 1644  
      * Notice that there is a chance that URI characters that are non UTF-8
 1645  
      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
 1646  
      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
 1647  
      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
 1648  
      * false reading.
 1649  
      * <p>
 1650  
      * The percent "%" character always has the reserved purpose of being
 1651  
      * the escape indicator, it must be escaped as "%25" in order to be used
 1652  
      * as data within a URI.
 1653  
      * <p>
 1654  
      * The unescape method is internally performed within this method.
 1655  
      *
 1656  
      * @param component the URI character sequence
 1657  
      * @param charset the protocol charset
 1658  
      * @return original character sequence
 1659  
      * @exception IOException incomplete trailing escape pattern
 1660  
      * or unsupported character encoding
 1661  
      */
 1662  
     protected static String decode(char[] component, String charset)
 1663  
         throws IOException {
 1664  
 
 1665  
         // unescape uri characters to octets
 1666  0
         if (component == null)  return null;
 1667  
 
 1668  
         byte[] octets;
 1669  
         try {
 1670  0
             octets = new String(component).getBytes(charset);
 1671  0
         } catch (UnsupportedEncodingException error) {
 1672  0
             throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */
 1673  0
                     "URI: not supported " + charset + " encoding");
 1674  0
         }
 1675  0
         int length = octets.length;
 1676  0
         int oi = 0; // output index
 1677  0
         for (int ii = 0; ii < length; oi++) {
 1678  0
             byte aByte = (byte) octets[ii++];
 1679  0
             if (aByte == '%' && ii+2 <= length)  {
 1680  0
                 byte high = (byte) Character.digit((char) octets[ii++], 16);
 1681  0
                 byte low = (byte) Character.digit((char) octets[ii++], 16);
 1682  0
                 if (high == -1 || low == -1) {
 1683  0
                     throw new IOException(/* IOException.ESCAPING, */
 1684  0
                             "URI: incomplete trailing escape pattern");
 1685  
                             
 1686  
                 }
 1687  0
                 aByte = (byte) ((high << 4) + low);
 1688  
             }
 1689  0
             octets[oi] = (byte) aByte;
 1690  
         }
 1691  
 
 1692  
         String result;
 1693  
         try {
 1694  0
             result = new String(octets, 0, oi, charset);
 1695  0
         } catch (UnsupportedEncodingException error) {
 1696  0
             throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */
 1697  0
                     "URI: not supported " + charset + " encoding");
 1698  0
         }
 1699  
 
 1700  0
         return result;
 1701  
     }
 1702  
 
 1703  
 
 1704  
     /**
 1705  
      * Pre-validate the unescaped URI string within a specific component.
 1706  
      *
 1707  
      * @param component the component string within the component
 1708  
      * @param disallowed those characters disallowed within the component
 1709  
      * @return if true, it doesn't have the disallowed characters
 1710  
      * if false, the component is undefined or an incorrect one
 1711  
      */
 1712  
     protected boolean prevalidate(String component, BitSet disallowed) {
 1713  
         // prevalidate the given component by disallowed characters
 1714  0
         if (component == null) {
 1715  0
             return false; // undefined
 1716  
         }
 1717  0
         char[] target = component.toCharArray();
 1718  0
         for (int i = 0; i < target.length; i++) {
 1719  0
             if (disallowed.get(target[i])) {
 1720  0
                 return false;
 1721  
             }
 1722  
         }
 1723  0
         return true;
 1724  
     }
 1725  
 
 1726  
 
 1727  
     /**
 1728  
      * Validate the URI characters within a specific component.
 1729  
      * The component must be performed after escape encoding. Or it doesn't
 1730  
      * include escaped characters.
 1731  
      *
 1732  
      * @param component the characters sequence within the component
 1733  
      * @param generous those characters that are allowed within a component
 1734  
      * @return if true, it's the correct URI character sequence
 1735  
      */
 1736  
     protected boolean validate(char[] component, BitSet generous) {
 1737  
         // validate each component by generous characters
 1738  0
         return validate(component, 0, -1, generous);
 1739  
     }
 1740  
 
 1741  
 
 1742  
     /**
 1743  
      * Validate the URI characters within a specific component.
 1744  
      * The component must be performed after escape encoding. Or it doesn't
 1745  
      * include escaped characters.
 1746  
      * <p>
 1747  
      * It's not that much strict, generous.  The strict validation might be 
 1748  
      * performed before being called this method.
 1749  
      *
 1750  
      * @param component the characters sequence within the component
 1751  
      * @param soffset the starting offset of the given component
 1752  
      * @param eoffset the ending offset of the given component
 1753  
      * if -1, it means the length of the component
 1754  
      * @param generous those characters that are allowed within a component
 1755  
      * @return if true, it's the correct URI character sequence
 1756  
      * @throws NullPointerException null component
 1757  
      */
 1758  
     protected boolean validate(char[] component, int soffset, int eoffset,
 1759  
             BitSet generous) {
 1760  
         // validate each component by generous characters
 1761  0
         if (eoffset == -1) {
 1762  0
             eoffset = component.length -1;
 1763  
         }
 1764  0
         for (int i = soffset; i <= eoffset; i++) {
 1765  0
             if (!generous.get(component[i])) return false;
 1766  
         }
 1767  0
         return true;
 1768  
     }
 1769  
 
 1770  
 
 1771  
     /**
 1772  
      * In order to avoid any possilbity of conflict with non-ASCII characters,
 1773  
      * Parse a URI reference as a <code>String</code> with the character
 1774  
      * encoding of the local system or the document.
 1775  
      * <p>
 1776  
      * The following line is the regular expression for breaking-down a URI
 1777  
      * reference into its components.
 1778  
      * <p><blockquote><pre>
 1779  
      *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 1780  
      *    12            3  4          5       6  7        8 9
 1781  
      * </pre></blockquote><p>
 1782  
      * For example, matching the above expression to
 1783  
      *   http://jakarta.apache.org/ietf/uri/#Related
 1784  
      * results in the following subexpression matches:
 1785  
      * <p><blockquote><pre>
 1786  
      *               $1 = http:
 1787  
      *  scheme    =  $2 = http
 1788  
      *               $3 = //jakarta.apache.org
 1789  
      *  authority =  $4 = jakarta.apache.org
 1790  
      *  path      =  $5 = /ietf/uri/
 1791  
      *               $6 = <undefined>
 1792  
      *  query     =  $7 = <undefined>
 1793  
      *               $8 = #Related
 1794  
      *  fragment  =  $9 = Related
 1795  
      * </pre></blockquote><p>
 1796  
      *
 1797  
      * @param original the original character sequence
 1798  
      * @param escaped <code>true</code> if <code>original</code> is escaped
 1799  
      * @return the original character sequence
 1800  
      * @exception IOException
 1801  
      */
 1802  
     protected void parseUriReference(String original, boolean escaped)
 1803  
         throws IOException {
 1804  
 
 1805  
         // validate and contruct the URI character sequence
 1806  0
         if (original == null || original.length() == 0) {
 1807  0
             throw new IOException("URI-Reference required");
 1808  
         }
 1809  
 
 1810  
         /** @
 1811  
          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 1812  
          */
 1813  0
         String tmp = original.trim();
 1814  
         
 1815  
         /**
 1816  
          * The length of the string sequence of characters.
 1817  
          * It may not be equal to the length of the byte array.
 1818  
          */
 1819  0
         int length = tmp.length();
 1820  
 
 1821  
         /**
 1822  
          * Remove the delimiters like angle brackets around an URI.
 1823  
          */
 1824  0
         char[] firstDelimiter = { tmp.charAt(0) };
 1825  0
         if (validate(firstDelimiter, delims)) {
 1826  0
             if (length >= 2) {
 1827  0
                 char[] lastDelimiter = { tmp.charAt(length - 1) };
 1828  0
                 if (validate(lastDelimiter, delims)) {
 1829  0
                     tmp = tmp.substring(1, length - 1);
 1830  0
                     length = length - 2;
 1831  
                 }
 1832  
             }
 1833  
         }
 1834  
 
 1835  
         /**
 1836  
          * The starting index
 1837  
          */
 1838  0
         int from = 0;
 1839  
 
 1840  
         /**
 1841  
          * The test flag whether the URI is started from the path component.
 1842  
          */
 1843  0
         boolean isStartedFromPath = false;
 1844  0
         int atColon = tmp.indexOf(':');
 1845  0
         int atSlash = tmp.indexOf('/');
 1846  0
         if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) {
 1847  0
             isStartedFromPath = true;
 1848  
         }
 1849  
 
 1850  
         /**
 1851  
          * <p><blockquote><pre>
 1852  
          *     @@@@@@@@
 1853  
          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 1854  
          * </pre></blockquote><p>
 1855  
          */
 1856  0
         int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
 1857  0
         if (at == -1) at = 0;
 1858  
 
 1859  
         /**
 1860  
          * Parse the scheme.
 1861  
          * <p><blockquote><pre>
 1862  
          *  scheme    =  $2 = http
 1863  
          *              @
 1864  
          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 1865  
          * </pre></blockquote><p>
 1866  
          */
 1867  0
         if (at < length && tmp.charAt(at) == ':') {
 1868  0
             char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
 1869  0
             if (validate(target, scheme)) {
 1870  0
                 _scheme = target;
 1871  0
             } else {
 1872  0
                 throw new IOException("incorrect scheme");
 1873  
             }
 1874  0
             from = ++at;
 1875  
         }
 1876  
 
 1877  
         /**
 1878  
          * Parse the authority component.
 1879  
          * <p><blockquote><pre>
 1880  
          *  authority =  $4 = jakarta.apache.org
 1881  
          *                  @@
 1882  
          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 1883  
          * </pre></blockquote><p>
 1884  
          */
 1885  
         // Reset flags
 1886  0
         _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
 1887  0
         if (0 <= at && at < length && tmp.charAt(at) == '/') {
 1888  
             // Set flag
 1889  0
             _is_hier_part = true;
 1890  0
             if (at + 2 < length && tmp.charAt(at + 1) == '/') {
 1891  
                 // the temporary index to start the search from
 1892  0
                 int next = indexFirstOf(tmp, "/?#", at + 2);
 1893  0
                 if (next == -1) {
 1894  0
                     next = (tmp.substring(at + 2).length() == 0) ? at + 2 :
 1895  0
                     tmp.length();
 1896  
                 }
 1897  0
                 parseAuthority(tmp.substring(at + 2, next), escaped);
 1898  0
                 from = at = next;
 1899  
                 // Set flag
 1900  0
                 _is_net_path = true;
 1901  
             }
 1902  0
             if (from == at) {
 1903  
                 // Set flag
 1904  0
                 _is_abs_path = true;
 1905  
             }
 1906  
         }
 1907  
 
 1908  
         /**
 1909  
          * Parse the path component.
 1910  
          * <p><blockquote><pre>
 1911  
          *  path      =  $5 = /ietf/uri/
 1912  
          *                                @@@@@@
 1913  
          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 1914  
          * </pre></blockquote><p>
 1915  
          */
 1916  0
         if (from < length) {
 1917  
             // rel_path = rel_segment [ abs_path ]
 1918  0
             int next = indexFirstOf(tmp, "?#", from);
 1919  0
             if (next == -1) {
 1920  0
                 next = tmp.length();
 1921  
             }
 1922  0
             if (!_is_abs_path) {
 1923  0
                 if (!escaped && prevalidate(tmp.substring(from, next),
 1924  0
                             disallowed_rel_path) || escaped &&
 1925  0
                         validate(tmp.substring(from, next).toCharArray(),
 1926  0
                             rel_path)) {
 1927  
                     // Set flag
 1928  0
                     _is_rel_path = true;
 1929  0
                 } else if (!escaped && prevalidate(tmp.substring(from, next),
 1930  0
                             disallowed_opaque_part) || escaped &&
 1931  0
                         validate(tmp.substring(from, next).toCharArray(),
 1932  0
                             opaque_part)) {
 1933  
                     // Set flag
 1934  0
                     _is_opaque_part = true;
 1935  0
                 } else {
 1936  
                     // the path component may be empty
 1937  0
                     _path = null;
 1938  
                 }
 1939  
             }
 1940  0
             setPath(tmp.substring(from, next));
 1941  0
             at = next;
 1942  
         }
 1943  
 
 1944  
         /**
 1945  
          * Parse the query component.
 1946  
          * <p><blockquote><pre>
 1947  
          *  query     =  $7 = <undefined>
 1948  
          *                                        @@@@@@@@@
 1949  
          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 1950  
          * </pre></blockquote><p>
 1951  
          */
 1952  0
         if (0 <= at && at+1 < length && tmp.charAt(at) == '?') {
 1953  0
             int next = tmp.indexOf('#', at + 1);
 1954  0
             if (next == -1) {
 1955  0
                 next = tmp.length();
 1956  
             }
 1957  0
             _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() :
 1958  0
             encode(tmp.substring(at + 1, next), allowed_query);
 1959  0
             at = next;
 1960  
         }
 1961  
 
 1962  
         /**
 1963  
          * Parse the fragment component.
 1964  
          * <p><blockquote><pre>
 1965  
          *  fragment  =  $9 = Related
 1966  
          *                                                   @@@@@@@@
 1967  
          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 1968  
          * </pre></blockquote><p>
 1969  
          */
 1970  0
         if (0 <= at && at+1 < length && tmp.charAt(at) == '#') {
 1971  0
             _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() :
 1972  0
             encode(tmp.substring(at + 1), allowed_fragment);
 1973  
         }
 1974  
 
 1975  
         // set this URI.
 1976  0
         setUriReference();
 1977  0
     }
 1978  
 
 1979  
 
 1980  
     /**
 1981  
      * Get the earlier index that to be searched for the first occurrance in
 1982  
      * one of any of the given string.
 1983  
      *
 1984  
      * @param s the string to be indexed
 1985  
      * @param delims the delimiters used to index
 1986  
      * @return the earlier index if there are delimiters
 1987  
      */
 1988  
     protected int indexFirstOf(String s, String delims) {
 1989  0
         return indexFirstOf(s, delims, -1);
 1990  
     }
 1991  
 
 1992  
 
 1993  
     /**
 1994  
      * Get the earlier index that to be searched for the first occurrance in
 1995  
      * one of any of the given string.
 1996  
      *
 1997  
      * @param s the string to be indexed
 1998  
      * @param delims the delimiters used to index
 1999  
      * @param offset the from index
 2000  
      * @return the earlier index if there are delimiters
 2001  
      */
 2002  
     protected int indexFirstOf(String s, String delims, int offset) {
 2003  0
         if (s == null || s.length() == 0) {
 2004  0
             return -1;
 2005  
         }
 2006  0
         if (delims == null || delims.length() == 0) {
 2007  0
             return -1;
 2008  
         }
 2009  
         // check boundaries
 2010  0
         if (offset < 0) {
 2011  0
             offset = 0;
 2012  0
         } else if (offset > s.length()) {
 2013  0
             return -1;
 2014  
         }
 2015  
         // s is never null
 2016  0
         int min = s.length();
 2017  0
         char[] delim = delims.toCharArray();
 2018  0
         for (int i = 0; i < delim.length; i++) {
 2019  0
             int at = s.indexOf(delim[i], offset);
 2020  0
             if (at >= 0 && at < min) {
 2021  0
                 min = at;
 2022  
             }
 2023  
         }
 2024  0
         return (min == s.length()) ? -1 : min;
 2025  
     }
 2026  
 
 2027  
 
 2028  
     /**
 2029  
      * Get the earlier index that to be searched for the first occurrance in
 2030  
      * one of any of the given array.
 2031  
      *
 2032  
      * @param s the character array to be indexed
 2033  
      * @param delim the delimiter used to index
 2034  
      * @return the ealier index if there are a delimiter
 2035  
      */
 2036  
     protected int indexFirstOf(char[] s, char delim) {
 2037  0
         return indexFirstOf(s, delim, 0);
 2038  
     }
 2039  
 
 2040  
 
 2041  
     /**
 2042  
      * Get the earlier index that to be searched for the first occurrance in
 2043  
      * one of any of the given array.
 2044  
      *
 2045  
      * @param s the character array to be indexed
 2046  
      * @param delim the delimiter used to index
 2047  
      * @return the ealier index if there is a delimiter
 2048  
      */
 2049  
     protected int indexFirstOf(char[] s, char delim, int offset) {
 2050  0
         if (s == null || s.length == 0) {
 2051  0
             return -1;
 2052  
         }
 2053  
         // check boundaries
 2054  0
         if (offset < 0) {
 2055  0
             offset = 0;
 2056  0
         } else if (offset > s.length) {
 2057  0
             return -1;
 2058  
         }
 2059  0
         for (int i = offset; i < s.length; i++) {
 2060  0
             if (s[i] == delim) {
 2061  0
                 return i;
 2062  
             }
 2063  
         }
 2064  0
         return -1;
 2065  
     }
 2066  
 
 2067  
 
 2068  
     /**
 2069  
      * Parse the authority component.
 2070  
      *
 2071  
      * @param original the original character sequence of authority component
 2072  
      * @param escaped <code>true</code> if <code>original</code> is escaped
 2073  
      * @exception IOException
 2074  
      */
 2075  
     protected void parseAuthority(String original, boolean escaped)
 2076  
         throws IOException {
 2077  
 
 2078  
         // Reset flags
 2079  0
         _is_reg_name = _is_server =
 2080  0
         _is_hostname = _is_IPv4address = _is_IPv6reference = false;
 2081  
 
 2082  0
         boolean has_port = true;
 2083  0
         int from = 0;
 2084  0
         int next = original.indexOf('@');
 2085  0
         if (next != -1) { // neither -1 and 0
 2086  
             // each protocol extented from URI supports the specific userinfo
 2087  0
             _userinfo = (escaped) ? original.substring(0, next).toCharArray() :
 2088  0
             encode(original.substring(0, next), allowed_userinfo);
 2089  0
             from = next + 1;
 2090  
         }
 2091  0
         next = original.indexOf('[', from);
 2092  0
         if (next >= from) {
 2093  0
             next = original.indexOf(']', from);
 2094  0
             if (next == -1) {
 2095  0
                 throw new IOException(/* IOException.PARSING,*/ "URI: IPv6reference");
 2096  
             } else {
 2097  0
                 next++;
 2098  
             }
 2099  
             // In IPv6reference, '[', ']' should be excluded
 2100  0
             _host = (escaped) ? original.substring(from, next).toCharArray() :
 2101  0
             encode(original.substring(from, next), allowed_IPv6reference);
 2102  
             // Set flag
 2103  0
             _is_IPv6reference = true;
 2104  0
         } else { // only for !_is_IPv6reference
 2105  0
             next = original.indexOf(':', from);
 2106  0
             if (next == -1) {
 2107  0
                 next = original.length();
 2108  0
                 has_port = false;
 2109  
             }
 2110  
             // REMINDME: it doesn't need the pre-validation
 2111  0
             _host = original.substring(from, next).toCharArray();
 2112  0
             if (validate(_host, IPv4address)) {
 2113  
                 // Set flag
 2114  0
                 _is_IPv4address = true;
 2115  0
             } else if (validate(_host, hostname)) {
 2116  
                 // Set flag
 2117  0
                 _is_hostname = true;
 2118  0
             } else {
 2119  
                 // Set flag
 2120  0
                 _is_reg_name = true;
 2121  
             }
 2122  
         }
 2123  0
         if (_is_reg_name) {
 2124  
             // Reset flags for a server-based naming authority
 2125  0
             _is_server = _is_hostname = _is_IPv4address =
 2126  0
             _is_IPv6reference = false;
 2127  
             // set a registry-based naming authority
 2128  0
             _authority = (escaped) ? original.toString().toCharArray() :
 2129  0
             encode(original.toString(), allowed_reg_name);
 2130  0
         } else {
 2131  0
             if (original.length()-1 > next && has_port &&
 2132  0
                     original.charAt(next) == ':') { // not empty
 2133  0
                 from = next + 1;
 2134  
                 try {
 2135  0
                     _port = Integer.parseInt(original.substring(from));
 2136  0
                 } catch (NumberFormatException error) {
 2137  0
                     throw new IOException(/*IOException.PARSING, */
 2138  0
                             "URI: invalid port number");
 2139  0
                 }
 2140  
             }
 2141  
             // set a server-based naming authority
 2142  0
             StringBuffer buf = new StringBuffer();
 2143  0
             if (_userinfo != null) { // has_userinfo
 2144  0
                 buf.append(_userinfo);
 2145  0
                 buf.append('@');
 2146  
             }
 2147  0
             if (_host != null) {
 2148  0
                 buf.append(_host);
 2149  0
                 if (_port != -1) {
 2150  0
                     buf.append(':');
 2151  0
                     buf.append(_port);
 2152  
                 }
 2153  
             }
 2154  0
             _authority = buf.toString().toCharArray();
 2155  
             // Set flag
 2156  0
             _is_server = true;
 2157  
         }
 2158  0
     }
 2159  
 
 2160  
 
 2161  
     /**
 2162  
      * Once it's parsed successfully, set this URI.
 2163  
      *
 2164  
      * @see #getRawURI
 2165  
      */
 2166  
     protected void setUriReference() {
 2167  
         // set _uri
 2168  0
         StringBuffer buf = new StringBuffer();
 2169  
         // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
 2170  0
         if (_scheme != null) {
 2171  0
             buf.append(_scheme);
 2172  0
             buf.append(':');
 2173  
         }
 2174  0
         if (_is_net_path) {
 2175  0
             buf.append("//");
 2176  0
             if (_authority != null) { // has_authority
 2177  0
                 if (_userinfo != null) { // by default, remove userinfo part
 2178  0
                     if (_host != null) {
 2179  0
                         buf.append(_host);
 2180  0
                         if (_port != -1) {
 2181  0
                             buf.append(':');
 2182  0
                             buf.append(_port);
 2183  0
                         }
 2184  
                     }
 2185  
                 } else {
 2186  0
                     buf.append(_authority);
 2187  
                 }
 2188  
             }
 2189  
         }
 2190  0
         if (_opaque != null && _is_opaque_part) {
 2191  0
             buf.append(_opaque);
 2192  0
         } else if (_path != null) {
 2193  
             // _is_hier_part or _is_relativeURI
 2194  0
             if (_path.length != 0) {
 2195  0
                 buf.append(_path);
 2196  
             }
 2197  
         }
 2198  0
         if (_query != null) { // has_query
 2199  0
             buf.append('?');
 2200  0
             buf.append(_query);
 2201  
         }
 2202  0
         if (_fragment != null) { // has_fragment
 2203  0
             buf.append('#');
 2204  0
             buf.append(_fragment);
 2205  
         }
 2206  
 
 2207  0
         _uri = buf.toString().toCharArray();
 2208  0
     }
 2209  
 
 2210  
     // ----------------------------------------------------------- Test methods
 2211  
   
 2212  
 
 2213  
     /**
 2214  
      * Tell whether or not this URI is absolute.
 2215  
      *
 2216  
      * @return true iif this URI is absoluteURI
 2217  
      */
 2218  
     public boolean isAbsoluteURI() {
 2219  0
         return (_scheme != null);
 2220  
     }
 2221  
   
 2222  
 
 2223  
     /**
 2224  
      * Tell whether or not this URI is relative.
 2225  
      *
 2226  
      * @return true iif this URI is relativeURI
 2227  
      */
 2228  
     public boolean isRelativeURI() {
 2229  0
         return (_scheme == null);
 2230  
     }
 2231  
 
 2232  
 
 2233  
     /**
 2234  
      * Tell whether or not the absoluteURI of this URI is hier_part.
 2235  
      *
 2236  
      * @return true iif the absoluteURI is hier_part
 2237  
      */
 2238  
     public boolean isHierPart() {
 2239  0
         return _is_hier_part;
 2240  
     }
 2241  
 
 2242  
 
 2243  
     /**
 2244  
      * Tell whether or not the absoluteURI of this URI is opaque_part.
 2245  
      *
 2246  
      * @return true iif the absoluteURI is opaque_part
 2247  
      */
 2248  
     public boolean isOpaquePart() {
 2249  0
         return _is_opaque_part;
 2250  
     }
 2251  
 
 2252  
 
 2253  
     /**
 2254  
      * Tell whether or not the relativeURI or heir_part of this URI is net_path.
 2255  
      * It's the same function as the has_authority() method.
 2256  
      *
 2257  
      * @return true iif the relativeURI or heir_part is net_path
 2258  
      * @see #hasAuthority
 2259  
      */
 2260  
     public boolean isNetPath() {
 2261  0
         return _is_net_path || (_authority != null);
 2262  
     }
 2263  
 
 2264  
 
 2265  
     /**
 2266  
      * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
 2267  
      *
 2268  
      * @return true iif the relativeURI or hier_part is abs_path
 2269  
      */
 2270  
     public boolean isAbsPath() {
 2271  0
         return _is_abs_path;
 2272  
     }
 2273  
 
 2274  
 
 2275  
     /**
 2276  
      * Tell whether or not the relativeURI of this URI is rel_path.
 2277  
      *
 2278  
      * @return true iif the relativeURI is rel_path
 2279  
      */
 2280  
     public boolean isRelPath() {
 2281  0
         return _is_rel_path;
 2282  
     }
 2283  
 
 2284  
 
 2285  
     /**
 2286  
      * Tell whether or not this URI has authority.
 2287  
      * It's the same function as the is_net_path() method.
 2288  
      *
 2289  
      * @return true iif this URI has authority
 2290  
      * @see #isNetPath
 2291  
      */
 2292  
     public boolean hasAuthority() {
 2293  0
         return (_authority != null) || _is_net_path;
 2294  
     }
 2295  
 
 2296  
     /**
 2297  
      * Tell whether or not the authority component of this URI is reg_name.
 2298  
      *
 2299  
      * @return true iif the authority component is reg_name
 2300  
      */
 2301  
     public boolean isRegName() {
 2302  0
         return _is_reg_name;
 2303  
     }
 2304  
   
 2305  
 
 2306  
     /**
 2307  
      * Tell whether or not the authority component of this URI is server.
 2308  
      *
 2309  
      * @return true iif the authority component is server
 2310  
      */
 2311  
     public boolean isServer() {
 2312  0
         return _is_server;
 2313  
     }
 2314  
   
 2315  
 
 2316  
     /**
 2317  
      * Tell whether or not this URI has userinfo.
 2318  
      *
 2319  
      * @return true iif this URI has userinfo
 2320  
      */
 2321  
     public boolean hasUserinfo() {
 2322  0
         return (_userinfo != null);
 2323  
     }
 2324  
   
 2325  
 
 2326  
     /**
 2327  
      * Tell whether or not the host part of this URI is hostname.
 2328  
      *
 2329  
      * @return true iif the host part is hostname
 2330  
      */
 2331  
     public boolean isHostname() {
 2332  0
         return _is_hostname;
 2333  
     }
 2334  
 
 2335  
 
 2336  
     /**
 2337  
      * Tell whether or not the host part of this URI is IPv4address.
 2338  
      *
 2339  
      * @return true iif the host part is IPv4address
 2340  
      */
 2341  
     public boolean isIPv4address() {
 2342  0
         return _is_IPv4address;
 2343  
     }
 2344  
 
 2345  
 
 2346  
     /**
 2347  
      * Tell whether or not the host part of this URI is IPv6reference.
 2348  
      *
 2349  
      * @return true iif the host part is IPv6reference
 2350  
      */
 2351  
     public boolean isIPv6reference() {
 2352  0
         return _is_IPv6reference;
 2353  
     }
 2354  
 
 2355  
 
 2356  
     /**
 2357  
      * Tell whether or not this URI has query.
 2358  
      *
 2359  
      * @return true iif this URI has query
 2360  
      */
 2361  
     public boolean hasQuery() {
 2362  0
         return (_query != null);
 2363  
     }
 2364  
    
 2365  
 
 2366  
     /**
 2367  
      * Tell whether or not this URI has fragment.
 2368  
      *
 2369  
      * @return true iif this URI has fragment
 2370  
      */
 2371  
     public boolean hasFragment() {
 2372  0
         return (_fragment != null);
 2373  
     }
 2374  
    
 2375  
    
 2376  
     // ---------------------------------------------------------------- Charset
 2377  
 
 2378  
 
 2379  
     /**
 2380  
      * Set the default charset of the protocol.
 2381  
      * <p>
 2382  
      * The character set used to store files SHALL remain a local decision and
 2383  
      * MAY depend on the capability of local operating systems. Prior to the
 2384  
      * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
 2385  
      * and UTF-8 encoded. This approach, while allowing international exchange
 2386  
      * of URIs, will still allow backward compatibility with older systems
 2387  
      * because the code set positions for ASCII characters are identical to the
 2388  
      * one byte sequence in UTF-8.
 2389  
      * <p>
 2390  
      * An individual URI scheme may require a single charset, define a default
 2391  
      * charset, or provide a way to indicate the charset used.
 2392  
      *
 2393  
      * @param charset the default charset for each protocol
 2394  
      */
 2395  
     public static void setProtocolCharset(String charset) {
 2396  0
         _protocolCharset = charset;
 2397  0
     }
 2398  
 
 2399  
 
 2400  
     /**
 2401  
      * Get the default charset of the protocol.
 2402  
      * <p>
 2403  
      * An individual URI scheme may require a single charset, define a default
 2404  
      * charset, or provide a way to indicate the charset used.
 2405  
      * <p>
 2406  
      * To work globally either requires support of a number of character sets
 2407  
      * and to be able to convert between them, or the use of a single preferred
 2408  
      * character set.
 2409  
      * For support of global compatibility it is STRONGLY RECOMMENDED that
 2410  
      * clients and servers use UTF-8 encoding when exchanging URIs.
 2411  
      *
 2412  
      * @return the charset string
 2413  
      */
 2414  
     public static String getProtocolCharset() {
 2415  0
         return _protocolCharset;
 2416  
     }
 2417  
 
 2418  
 
 2419  
     /**
 2420  
      * Set the default charset of the document.
 2421  
      * <p>
 2422  
      * Notice that it will be possible to contain mixed characters (e.g.
 2423  
      * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
 2424  
      * display of these character sets, the protocol charset could be simply
 2425  
      * used again. Because it's not yet implemented that the insertion of BIDI
 2426  
      * control characters at different points during composition is extracted.
 2427  
      *
 2428  
      * @param charset the default charset for the document
 2429  
      */
 2430  
     public static void setDocumentCharset(String charset) {
 2431  0
         _documentCharset = charset;
 2432  0
     }
 2433  
 
 2434  
 
 2435  
     /**
 2436  
      * Get the default charset of the document.
 2437  
      *
 2438  
      * @return the charset string
 2439  
      */
 2440  
     public static String getDocumentCharset() {
 2441  0
         return _documentCharset;
 2442  
     }
 2443  
 
 2444  
     // ------------------------------------------------------------- The scheme
 2445  
 
 2446  
     /**
 2447  
      * Get the scheme.
 2448  
      *
 2449  
      * @return the scheme
 2450  
      */
 2451  
     public char[] getRawScheme() {
 2452  0
         return _scheme;
 2453  
     }
 2454  
 
 2455  
 
 2456  
     /**
 2457  
      * Get the scheme.
 2458  
      *
 2459  
      * @return the scheme
 2460  
      * null if undefined scheme
 2461  
      */
 2462  
     public String getScheme() {
 2463  0
         return (_scheme == null) ? null : new String(_scheme);
 2464  
     }
 2465  
 
 2466  
     // ---------------------------------------------------------- The authority
 2467  
 
 2468  
     /**
 2469  
      * Set the authority.  It can be one type of server, hostport, hostname,
 2470  
      * IPv4address, IPv6reference and reg_name.
 2471  
      * <p><blockquote><pre>
 2472  
      *   authority     = server | reg_name
 2473  
      * </pre></blockquote><p>
 2474  
      *
 2475  
      * @param escapedAuthority the raw escaped authority
 2476  
      * @exception IOException
 2477  
      * @throws NullPointerException null authority
 2478  
      */
 2479  
     public void setRawAuthority(char[] escapedAuthority) throws IOException {
 2480  0
         parseAuthority(new String(escapedAuthority), true);
 2481  0
         setUriReference();
 2482  0
     }
 2483  
 
 2484  
 
 2485  
     /**
 2486  
      * Set the authority.  It can be one type of server, hostport, hostname,
 2487  
      * IPv4address, IPv6reference and reg_name.
 2488  
      * Note that there is no setAuthority method by the escape encoding reason.
 2489  
      *
 2490  
      * @param escapedAuthority the escaped authority string
 2491  
      * @exception IOException
 2492  
      */
 2493  
     public void setEscapedAuthority(String escapedAuthority)
 2494  
         throws IOException {
 2495  
 
 2496  0
         parseAuthority(escapedAuthority, true);
 2497  0
         setUriReference();
 2498  0
     }
 2499  
 
 2500  
 
 2501  
     /**
 2502  
      * Get the raw-escaped authority.
 2503  
      *
 2504  
      * @return the raw-escaped authority
 2505  
      */
 2506  
     public char[] getRawAuthority() {
 2507  0
         return _authority;
 2508  
     }
 2509  
 
 2510  
 
 2511  
     /**
 2512  
      * Get the escaped authority.
 2513  
      *
 2514  
      * @return the escaped authority
 2515  
      */
 2516  
     public String getEscapedAuthority() {
 2517  0
         return (_authority == null) ? null : new String(_authority);
 2518  
     }
 2519  
 
 2520  
 
 2521  
     /**
 2522  
      * Get the authority.
 2523  
      *
 2524  
      * @return the authority
 2525  
      * @exception IOException
 2526  
      * @see #decode
 2527  
      */
 2528  
     public String getAuthority() throws IOException {
 2529  0
         return (_authority == null) ? null : decode(_authority);
 2530  
     }
 2531  
 
 2532  
     // ----------------------------------------------------------- The userinfo
 2533  
 
 2534  
     /**
 2535  
      * Get the raw-escaped userinfo.
 2536  
      *
 2537  
      * @return the raw-escaped userinfo
 2538  
      * @see #getAuthority
 2539  
      */
 2540  
     public char[] getRawUserinfo() {
 2541  0
         return _userinfo;
 2542  
     }
 2543  
 
 2544  
 
 2545  
     /**
 2546  
      * Get the escaped userinfo.
 2547  
      *
 2548  
      * @return the escaped userinfo
 2549  
      * @see #getAuthority
 2550  
      */
 2551  
     public String getEscapedUserinfo() {
 2552  0
         return (_userinfo == null) ? null : new String(_userinfo);
 2553  
     }
 2554  
 
 2555  
 
 2556  
     /**
 2557  
      * Get the userinfo.
 2558  
      *
 2559  
      * @return the userinfo
 2560  
      * @exception IOException
 2561  
      * @see #decode
 2562  
      * @see #getAuthority
 2563  
      */
 2564  
     public String getUserinfo() throws IOException {
 2565  0
         return (_userinfo == null) ? null : decode(_userinfo);
 2566  
     }
 2567  
 
 2568  
     // --------------------------------------------------------------- The host
 2569  
 
 2570  
     /**
 2571  
      * Get the host.
 2572  
      * <p><blockquote><pre>
 2573  
      *   host          = hostname | IPv4address | IPv6reference
 2574  
      * </pre></blockquote><p>
 2575  
      *
 2576  
      * @return the host
 2577  
      * @see #getAuthority
 2578  
      */
 2579  
     public char[] getRawHost() {
 2580  0
         return _host;
 2581  
     }
 2582  
 
 2583  
 
 2584  
     /**
 2585  
      * Get the host.
 2586  
      * <p><blockquote><pre>
 2587  
      *   host          = hostname | IPv4address | IPv6reference
 2588  
      * </pre></blockquote><p>
 2589  
      *
 2590  
      * @return the host
 2591  
      * @exception IOException
 2592  
      * @see #decode
 2593  
      * @see #getAuthority
 2594  
      */
 2595  
     public String getHost() throws IOException {
 2596  0
         return decode(_host);
 2597  
     }
 2598  
 
 2599  
     // --------------------------------------------------------------- The port
 2600  
 
 2601  
     /**
 2602  
      * Get the port.  In order to get the specfic default port, the specific
 2603  
      * protocol-supported class extended from the URI class should be used.
 2604  
      * It has the server-based naming authority.
 2605  
      *
 2606  
      * @return the port
 2607  
      * if -1, it has the default port for the scheme or the server-based
 2608  
      * naming authority is not supported in the specific URI.
 2609  
      */
 2610  
     public int getPort() {
 2611  0
         return _port;
 2612  
     }
 2613  
 
 2614  
     // --------------------------------------------------------------- The path
 2615  
 
 2616  
     /**
 2617  
      * Set the path.   The method couldn't be used by API programmers.
 2618  
      *
 2619  
      * @param path the path string
 2620  
      * @exception IOException set incorrectly or fragment only
 2621  
      * @see #encode
 2622  
      */
 2623  
     protected void setPath(String path) throws IOException {
 2624  
 
 2625  
         // set path
 2626  0
         if (_is_net_path || _is_abs_path) {
 2627  0
             _path = encode(path, allowed_abs_path);
 2628  0
         } else if (_is_rel_path) {
 2629  0
             StringBuffer buff = new StringBuffer(path.length());
 2630  0
             int at = path.indexOf('/');
 2631  0
             if (at > 0) {  // never 0
 2632  0
                 buff.append(encode(path.substring(0, at), allowed_rel_path));
 2633  0
                 buff.append(encode(path.substring(at), allowed_abs_path));
 2634  0
             } else {
 2635  0
                 buff.append(encode(path, allowed_rel_path));
 2636  
             }
 2637  0
             _path = buff.toString().toCharArray();
 2638  0
         } else if (_is_opaque_part) {
 2639  0
             _opaque = encode(path, allowed_opaque_part);
 2640  0
         } else {
 2641  0
             throw new IOException(/*IOException.PARSING, */"URI: incorrect path");
 2642  
         }
 2643  0
     }
 2644  
 
 2645  
 
 2646  
     /**
 2647  
      * Resolve the base and relative path.
 2648  
      *
 2649  
      * @param base_path a character array of the base_path
 2650  
      * @param rel_path a character array of the rel_path
 2651  
      * @return the resolved path
 2652  
      */
 2653  
     protected char[] resolvePath(char[] base_path, char[] rel_path) {
 2654  
 
 2655  
         // REMINDME: paths are never null
 2656  0
         String base = (base_path == null) ? "" : new String(base_path);
 2657  0
         int at = base.lastIndexOf('/');
 2658  0
         if (at != -1) {
 2659  0
             base_path = base.substring(0, at + 1).toCharArray();
 2660  
         }
 2661  
         // _path could be empty
 2662  0
         if (rel_path == null || rel_path.length == 0) {
 2663  0
             return normalize(base_path);
 2664  0
         } else if (rel_path[0] == '/') {
 2665  0
             return rel_path;
 2666  
         } else {
 2667  0
             StringBuffer buff = new StringBuffer(base.length() +
 2668  0
                 rel_path.length);
 2669  0
             if (at != -1) {
 2670  0
                 buff.append(base.substring(0, at + 1));
 2671  0
                 buff.append(rel_path);
 2672  
             }
 2673  0
             return normalize(buff.toString().toCharArray());
 2674  
         }
 2675  
     }
 2676  
 
 2677  
 
 2678  
     /**
 2679  
      * Get the raw-escaped current hierarchy level in the given path.
 2680  
      * If the last namespace is a collection, the slash mark ('/') should be
 2681  
      * ended with at the last character of the path string.
 2682  
      *
 2683  
      * @param path the path
 2684  
      * @return the current hierarchy level
 2685  
      * @exception IOException no hierarchy level
 2686  
      */
 2687  
     protected char[] getRawCurrentHierPath(char[] path) throws IOException {
 2688  
 
 2689  0
         if (_is_opaque_part) {
 2690  0
             throw new IOException(/*IOException.PARSING,*/ "URI: no hierarchy level");
 2691  
         }
 2692  0
         if (path == null) {
 2693  0
             throw new IOException(/*IOException.PARSING,*/ "URI: emtpy path");
 2694  
         }
 2695  0
         String buff = new String(path);
 2696  0
         int first = buff.indexOf('/');
 2697  0
         int last = buff.lastIndexOf('/');
 2698  0
         if (last == 0) {
 2699  0
             return rootPath;
 2700  0
         } else if (first != last && last != -1) {
 2701  0
             return buff.substring(0, last).toCharArray();
 2702  
         }
 2703  
         // FIXME: it could be a document on the server side
 2704  0
         return path;
 2705  
     }
 2706  
 
 2707  
 
 2708  
     /**
 2709  
      * Get the raw-escaped current hierarchy level.
 2710  
      *
 2711  
      * @return the raw-escaped current hierarchy level
 2712  
      * @exception IOException no hierarchy level
 2713  
      */
 2714  
     public char[] getRawCurrentHierPath() throws IOException {
 2715  0
         return (_path == null) ? null : getRawCurrentHierPath(_path);
 2716  
     }
 2717  
  
 2718  
 
 2719  
     /**
 2720  
      * Get the escaped current hierarchy level.
 2721  
      *
 2722  
      * @return the escaped current hierarchy level
 2723  
      * @exception IOException no hierarchy level
 2724  
      */
 2725  
     public String getEscapedCurrentHierPath() throws IOException {
 2726  0
         char[] path = getRawCurrentHierPath();
 2727  0
         return (path == null) ? null : new String(path);
 2728  
     }
 2729  
  
 2730  
 
 2731  
     /**
 2732  
      * Get the current hierarchy level.
 2733  
      *
 2734  
      * @return the current hierarchy level
 2735  
      * @exception IOException
 2736  
      * @see #decode
 2737  
      */
 2738  
     public String getCurrentHierPath() throws IOException {
 2739  0
         char[] path = getRawCurrentHierPath();
 2740  0
         return (path == null) ? null : decode(path);
 2741  
     }
 2742  
 
 2743  
 
 2744  
     /**
 2745  
      * Get the level above the this hierarchy level.
 2746  
      *
 2747  
      * @return the raw above hierarchy level
 2748  
      * @exception IOException
 2749  
      */
 2750  
     public char[] getRawAboveHierPath() throws IOException {
 2751  0
         char[] path = getRawCurrentHierPath();
 2752  0
         return (path == null) ? null : getRawCurrentHierPath(path);
 2753  
     }
 2754  
 
 2755  
 
 2756  
     /**
 2757  
      * Get the level above the this hierarchy level.
 2758  
      *
 2759  
      * @return the raw above hierarchy level
 2760  
      * @exception IOException
 2761  
      */
 2762  
     public String getEscapedAboveHierPath() throws IOException {
 2763  0
         char[] path = getRawAboveHierPath();
 2764  0
         return (path == null) ? null : new String(path);
 2765  
     }
 2766  
 
 2767  
 
 2768  
     /**
 2769  
      * Get the level above the this hierarchy level.
 2770  
      *
 2771  
      * @return the above hierarchy level
 2772  
      * @exception IOException
 2773  
      * @see #decode
 2774  
      */
 2775  
     public String getAboveHierPath() throws IOException {
 2776  0
         char[] path = getRawAboveHierPath();
 2777  0
         return (path == null) ? null : decode(path);
 2778  
     }
 2779  
 
 2780  
 
 2781  
     /**
 2782  
      * Get the raw-escaped path.
 2783  
      * <p><blockquote><pre>
 2784  
      *   path          = [ abs_path | opaque_part ]
 2785  
      * </pre></blockquote><p>
 2786  
      *
 2787  
      * @return the raw-escaped path
 2788  
      */
 2789  
     public char[] getRawPath() {
 2790  0
         return _is_opaque_part ? _opaque : _path;
 2791  
     }
 2792  
 
 2793  
 
 2794  
     /**
 2795  
      * Get the escaped path.
 2796  
      * <p><blockquote><pre>
 2797  
      *   path          = [ abs_path | opaque_part ]
 2798  
      *   abs_path      = "/"  path_segments 
 2799  
      *   opaque_part   = uric_no_slash *uric
 2800  
      * </pre></blockquote><p>
 2801  
      *
 2802  
      * @return the escaped path string
 2803  
      */
 2804  
     public String getEscapedPath() {
 2805  0
         char[] path = getRawPath();
 2806  0
         return (path == null) ? null : new String(path);
 2807  
     }
 2808  
 
 2809  
 
 2810  
     /**
 2811  
      * Get the path.
 2812  
      * <p><blockquote><pre>
 2813  
      *   path          = [ abs_path | opaque_part ]
 2814  
      * </pre></blockquote><p>
 2815  
      * @return the path string
 2816  
      * @exception IOException
 2817  
      * @see #decode
 2818  
      */
 2819  
     public String getPath() throws IOException { 
 2820  0
         char[] path =  getRawPath();
 2821  0
         return (path == null) ? null : decode(path);
 2822  
     }
 2823  
 
 2824  
 
 2825  
     /**
 2826  
      * Get the raw-escaped basename of the path.
 2827  
      *
 2828  
      * @return the raw-escaped basename
 2829  
      */
 2830  
     public char[] getRawName() {
 2831  0
         if (_path == null) return null;
 2832  
 
 2833  0
         int at = 0;
 2834  0
         for (int i = _path.length - 1; i >= 0; i--) {
 2835  0
             if (_path[i] == '/') {
 2836  0
                 at = i + 1;
 2837  0
                 break;
 2838  
             }
 2839  
         }
 2840  0
         int len = _path.length - at;
 2841  0
         char[] basename =  new char[len];
 2842  0
         System.arraycopy(_path, at, basename, 0, len);
 2843  0
         return basename;
 2844  
     }
 2845  
 
 2846  
 
 2847  
     /**
 2848  
      * Get the escaped basename of the path.
 2849  
      *
 2850  
      * @return the escaped basename string
 2851  
      */
 2852  
     public String getEscapedName() {
 2853  0
         char[] basename = getRawName();
 2854  0
         return (basename == null) ? null : new String(basename);
 2855  
     }
 2856  
 
 2857  
 
 2858  
     /**
 2859  
      * Get the basename of the path.
 2860  
      *
 2861  
      * @return the basename string
 2862  
      * @exception IOException incomplete trailing escape pattern
 2863  
      * Or unsupported character encoding
 2864  
      * @see #decode
 2865  
      */
 2866  
     public String getName() throws IOException {
 2867  0
         char[] basename = getRawName();
 2868  0
         return (basename == null) ? null : decode(getRawName());
 2869  
     }
 2870  
 
 2871  
     // ----------------------------------------------------- The path and query 
 2872  
 
 2873  
     /**
 2874  
      * Get the raw-escaped path and query.
 2875  
      *
 2876  
      * @return the raw-escaped path and query
 2877  
      */
 2878  
     public char[] getRawPathQuery() {
 2879  
 
 2880  0
         if (_path == null && _query == null) {
 2881  0
             return null;
 2882  
         }
 2883  0
         StringBuffer buff = new StringBuffer();
 2884  0
         if (_path != null) {
 2885  0
             buff.append(_path);
 2886  
         }
 2887  0
         if (_query != null) {
 2888  0
             buff.append('?');
 2889  0
             buff.append(_query);
 2890  
         }
 2891  0
         return buff.toString().toCharArray();
 2892  
     }
 2893  
 
 2894  
 
 2895  
     /**
 2896  
      * Get the escaped query.
 2897  
      *
 2898  
      * @return the escaped path and query string
 2899  
      */
 2900  
     public String getEscapedPathQuery() {
 2901  0
         char[] rawPathQuery = getRawPathQuery();
 2902  0
         return (rawPathQuery == null) ? null : new String(rawPathQuery);
 2903  
     }
 2904  
 
 2905  
 
 2906  
     /**
 2907  
      * Get the path and query.
 2908  
      *
 2909  
      * @return the path and query string.
 2910  
      * @exception IOException incomplete trailing escape pattern
 2911  
      * Or unsupported character encoding
 2912  
      * @see #decode
 2913  
      */
 2914  
     public String getPathQuery() throws IOException {
 2915  0
         char[] rawPathQuery = getRawPathQuery();
 2916  0
         return (rawPathQuery == null) ? null : decode(rawPathQuery);
 2917  
     }
 2918  
 
 2919  
     // -------------------------------------------------------------- The query 
 2920  
 
 2921  
     /**
 2922  
      * Set the raw-escaped query.
 2923  
      *
 2924  
      * @param escapedQuery the raw-escaped query
 2925  
      * @exception IOException escaped query not valid
 2926  
      * @throws NullPointerException null query
 2927  
      */
 2928  
     public void setRawQuery(char[] escapedQuery) throws IOException {
 2929  0
         if (!validate(escapedQuery, query))
 2930  0
             throw new IOException(/*IOException.ESCAPING,*/
 2931  0
                     "URI: escaped query not valid");
 2932  0
         _query = escapedQuery;
 2933  0
         setUriReference();
 2934  0
     }
 2935  
 
 2936  
 
 2937  
     /**
 2938  
      * Set the escaped query string.
 2939  
      *
 2940  
      * @param escapedQuery the escaped query string
 2941  
      * @exception IOException escaped query not valid
 2942  
      * @throws NullPointerException null query
 2943  
      */
 2944  
     public void setEscapedQuery(String escapedQuery) throws IOException {
 2945  0
         setRawQuery(escapedQuery.toCharArray());
 2946  0
     }
 2947  
 
 2948  
 
 2949  
     /**
 2950  
      * Set the query.
 2951  
      * When a query string is not misunderstood the reserved special characters
 2952  
      * ("&amp;", "=", "+", ",", and "$") within a query component, it is
 2953  
      * recommended to use in encoding the whole query with this method.
 2954  
      *
 2955  
      * @param query the query string.
 2956  
      * @exception IOException incomplete trailing escape pattern
 2957  
      * Or unsupported character encoding
 2958  
      * @throws NullPointerException null query
 2959  
      * @see #encode
 2960  
      */
 2961  
     public void setQuery(String query) throws IOException {
 2962  0
         setRawQuery(encode(query, allowed_query));
 2963  0
     }
 2964  
 
 2965  
 
 2966  
     /**
 2967  
      * Get the raw-escaped query.
 2968  
      *
 2969  
      * @return the raw-escaped query
 2970  
      */
 2971  
     public char[] getRawQuery() {
 2972  0
         return _query;
 2973  
     }
 2974  
 
 2975  
 
 2976  
     /**
 2977  
      * Get the escaped query.
 2978  
      *
 2979  
      * @return the escaped query string
 2980  
      */
 2981  
     public String getEscapedQuery() {
 2982  0
         return (_query == null) ? null : new String(_query);
 2983  
     }
 2984  
 
 2985  
 
 2986  
     /**
 2987  
      * Get the query.
 2988  
      *
 2989  
      * @return the query string.
 2990  
      * @exception IOException incomplete trailing escape pattern
 2991  
      * Or unsupported character encoding
 2992  
      * @see #decode
 2993  
      */
 2994  
     public String getQuery() throws IOException {
 2995  0
         return (_query == null) ? null : decode(_query);
 2996  
     }
 2997  
 
 2998  
     // ----------------------------------------------------------- The fragment 
 2999  
 
 3000  
     /**
 3001  
      * Set the raw-escaped fragment.
 3002  
      *
 3003  
      * @param escapedFragment the raw-escaped fragment
 3004  
      * @exception IOException escaped fragment not valid
 3005  
      * @throws NullPointerException null fragment
 3006  
      */
 3007  
     public void setRawFragment(char[] escapedFragment) throws IOException {
 3008  0
         if (!validate(escapedFragment, fragment))
 3009  0
             throw new IOException(/*IOException.ESCAPING,*/
 3010  0
                     "URI: escaped fragment not valid");
 3011  0
         _fragment = escapedFragment;
 3012  0
         setUriReference();
 3013  0
     }
 3014  
 
 3015  
 
 3016  
     /**
 3017  
      * Set the escaped fragment string.
 3018  
      *
 3019  
      * @param escapedFragment the escaped fragment string
 3020  
      * @exception IOException escaped fragment not valid
 3021  
      * @throws NullPointerException null fragment
 3022  
      */
 3023  
     public void setEscapedFragment(String escapedFragment) throws IOException {
 3024  0
         char[] fragmentSequence = escapedFragment.toCharArray();
 3025  0
         if (!validate(fragmentSequence, fragment))
 3026  0
             throw new IOException(/*IOException.ESCAPING,*/
 3027  0
                     "URI: escaped fragment not valid");
 3028  0
         _fragment = fragmentSequence;
 3029  0
         setUriReference();
 3030  0
     }
 3031  
 
 3032  
 
 3033  
     /**
 3034  
      * Set the fragment.
 3035  
      *
 3036  
      * @param the fragment string.
 3037  
      * @exception IOException
 3038  
      * Or unsupported character encoding
 3039  
      * @throws NullPointerException null fragment
 3040  
      */
 3041  
     public void setFragment(String fragment) throws IOException {
 3042  0
         _fragment = encode(fragment, allowed_fragment);
 3043  0
         setUriReference();
 3044  0
     }
 3045  
 
 3046  
 
 3047  
     /**
 3048  
      * Get the raw-escaped fragment.
 3049  
      * <p>
 3050  
      * The optional fragment identifier is not part of a URI, but is often used
 3051  
      * in conjunction with a URI.
 3052  
      * <p>
 3053  
      * The format and interpretation of fragment identifiers is dependent on
 3054  
      * the media type [RFC2046] of the retrieval result.
 3055  
      * <p>
 3056  
      * A fragment identifier is only meaningful when a URI reference is
 3057  
      * intended for retrieval and the result of that retrieval is a document
 3058  
      * for which the identified fragment is consistently defined.
 3059  
      *
 3060  
      * @return the raw-escaped fragment
 3061  
      */
 3062  
     public char[] getRawFragment() {
 3063  0
         return _fragment;
 3064  
     }
 3065  
 
 3066  
 
 3067  
     /**
 3068  
      * Get the escaped fragment.
 3069  
      *
 3070  
      * @return the escaped fragment string
 3071  
      */
 3072  
     public String getEscapedFragment() {
 3073  0
         return (_fragment == null) ? null : new String(_fragment);
 3074  
     }
 3075  
 
 3076  
 
 3077  
     /**
 3078  
      * Get the fragment.
 3079  
      *
 3080  
      * @return the fragment string
 3081  
      * @exception IOException incomplete trailing escape pattern
 3082  
      * Or unsupported character encoding
 3083  
      * @see #decode
 3084  
      */
 3085  
     public String getFragment() throws IOException {
 3086  0
         return (_fragment == null) ? null : decode(_fragment);
 3087  
     }
 3088  
 
 3089  
     // ------------------------------------------------------------- Utilities 
 3090  
 
 3091  
     /**
 3092  
      * Normalize the given hier path part.
 3093  
      *
 3094  
      * @param path the path to normalize
 3095  
      * @return the normalized path
 3096  
      */
 3097  
     protected char[] normalize(char[] path) {
 3098  
 
 3099  0
         if (path == null) return null;
 3100  
 
 3101  0
         String normalized = new String(path);
 3102  0
         boolean endsWithSlash = true;
 3103  
         // precondition
 3104  0
         if (!normalized.endsWith("/")) {
 3105  0
             normalized += '/';
 3106  0
             endsWithSlash = false;
 3107  
         }
 3108  0
         if (normalized.endsWith("/./") || normalized.endsWith("/../")) {
 3109  0
             endsWithSlash = true;
 3110  
         }
 3111  
         // Resolve occurrences of "/./" in the normalized path
 3112  0
         while (true) {
 3113  0
             int at = normalized.indexOf("/./");
 3114  0
             if (at == -1) {
 3115  0
                 break;
 3116  
             }
 3117  0
             normalized = normalized.substring(0, at) +
 3118  0
             normalized.substring(at + 2);
 3119  0
         }
 3120  
         // Resolve occurrences of "/../" in the normalized path
 3121  0
         while (true) {
 3122  0
             int at = normalized.indexOf("/../");
 3123  0
             if (at == -1) {
 3124  0
                 break;
 3125  
             }
 3126  0
             if (at == 0) {
 3127  0
                 normalized = "/";
 3128  0
                 break;
 3129  
             }
 3130  0
             int backward = normalized.lastIndexOf('/', at - 1);
 3131  0
             if (backward == -1) {
 3132  
                 // consider the rel_path
 3133  0
                 normalized = normalized.substring(at + 4);
 3134  0
             } else {
 3135  0
                 normalized = normalized.substring(0, backward) +
 3136  0
                 normalized.substring(at + 3);
 3137  
             }
 3138  0
         }
 3139  
         // Resolve occurrences of "//" in the normalized path
 3140  0
         while (true) {
 3141  0
             int at = normalized.indexOf("//");
 3142  0
             if (at == -1) {
 3143  0
                 break;
 3144  
             }
 3145  0
             normalized = normalized.substring(0, at) +
 3146  0
             normalized.substring(at + 1);
 3147  0
         }
 3148  0
         if (!endsWithSlash && normalized.endsWith("/")) {
 3149  0
             normalized = normalized.substring(0, normalized.length()-1);
 3150  0
         } else if (endsWithSlash && !normalized.endsWith("/")) {
 3151  0
             normalized = normalized + "/";
 3152  
         }
 3153  
         // Set the normalized path that we have completed
 3154  0
         return normalized.toCharArray();
 3155  
     }
 3156  
 
 3157  
 
 3158  
     /**
 3159  
      * Normalize the path part of this URI.
 3160  
      */
 3161  
     public void normalize() {
 3162  0
         _path = normalize(_path);
 3163  0
     }
 3164  
 
 3165  
 
 3166  
     /**
 3167  
      * Test if the first array is equal to the second array.
 3168  
      *
 3169  
      * @param first the first character array
 3170  
      * @param second the second character array
 3171  
      * @return true if they're equal
 3172  
      */
 3173  
     protected boolean equals(char[] first, char[] second) {
 3174  
 
 3175  0
         if (first == null && second == null) {
 3176  0
             return true;
 3177  
         }
 3178  0
         if (first == null || second == null) {
 3179  0
             return false;
 3180  
         }
 3181  0
         if (first.length != second.length) {
 3182  0
             return false;
 3183  
         }
 3184  0
         for (int i = 0; i < first.length; i++) {
 3185  0
             if (first[i] != second[i]) {
 3186  0
                 return false;
 3187  
             }
 3188  
         }
 3189  0
         return true;
 3190  
     }
 3191  
 
 3192  
 
 3193  
     /**
 3194  
      * Test an object if this URI is equal to another.
 3195  
      *
 3196  
      * @param obj an object to compare
 3197  
      * @return true if two URI objects are equal
 3198  
      */
 3199  
     public boolean equals(Object obj) {
 3200  
 
 3201  
         // normalize and test each components
 3202  0
         if (obj == this) {
 3203  0
             return true;
 3204  
         }
 3205  0
         if (!(obj instanceof URI)) {
 3206  0
             return false;
 3207  
         }
 3208  0
         URI another = (URI) obj;
 3209  
         // scheme
 3210  0
         if (!equals(_scheme, another._scheme)) {
 3211  0
             return false;
 3212  
         }
 3213  
         // is_opaque_part or is_hier_part?  and opaque
 3214  0
         if (!equals(_opaque, another._opaque)) {
 3215  0
             return false;
 3216  
         }
 3217  
         // is_hier_part
 3218  
         // has_authority
 3219  0
         if (!equals(_authority, another._authority)) {
 3220  0
             return false;
 3221  
         }
 3222  
         // path
 3223  0
         if (!equals(_path, another._path)) {
 3224  0
             return false;
 3225  
         }
 3226  
         // has_query
 3227  0
         if (!equals(_query, another._query)) {
 3228  0
             return false;
 3229  
         }
 3230  
         // has_fragment?  should be careful of the only fragment case.
 3231  0
         if (!equals(_fragment, another._fragment)) {
 3232  0
             return false;
 3233  
         }
 3234  0
         return true;
 3235  
     }
 3236  
 
 3237  
     // ---------------------------------------------------------- Serialization
 3238  
 
 3239  
     /**
 3240  
      * Write the content of this URI.
 3241  
      *
 3242  
      * @param oos the object-output stream
 3243  
      */
 3244  
     protected void writeObject(java.io.ObjectOutputStream oos)
 3245  
         throws IOException {
 3246  
 
 3247  0
         oos.defaultWriteObject();
 3248  0
     }
 3249  
 
 3250  
 
 3251  
     /**
 3252  
      * Read a URI.
 3253  
      *
 3254  
      * @param ois the object-input stream
 3255  
      */
 3256  
     protected void readObject(java.io.ObjectInputStream ois)
 3257  
         throws ClassNotFoundException, IOException {
 3258  
 
 3259  0
         ois.defaultReadObject();
 3260  0
     }
 3261  
 
 3262  
     // ------------------------------------------------------------- Comparison 
 3263  
 
 3264  
     /**
 3265  
      * Compare this URI to another object. 
 3266  
      *
 3267  
      * @param obj the object to be compared.
 3268  
      * @return 0, if it's same,
 3269  
      * -1, if failed, first being compared with in the authority component
 3270  
      * @exception ClassCastException not URI argument
 3271  
      * @throws NullPointerException null object
 3272  
      */
 3273  
     public int compareTo(Object obj) {
 3274  
 
 3275  0
         URI another = (URI) obj;
 3276  0
         if (!equals(_authority, another.getRawAuthority())) return -1;
 3277  0
         return toString().compareTo(another.toString());
 3278  
     }
 3279  
 
 3280  
     // ------------------------------------------------------------------ Clone
 3281  
 
 3282  
     /**
 3283  
      * Create and return a copy of this object, the URI-reference containing
 3284  
      * the userinfo component.  Notice that the whole URI-reference including
 3285  
      * the userinfo component counld not be gotten as a <code>String</code>.
 3286  
      * <p>
 3287  
      * To copy the identical <code>URI</code> object including the userinfo
 3288  
      * component, it should be used.
 3289  
      *
 3290  
      * @return a clone of this instance
 3291  
      */
 3292  
     public synchronized Object clone() {
 3293  
 
 3294  0
         URI instance = new URI();
 3295  
 
 3296  0
         instance._uri = _uri;
 3297  0
         instance._scheme = _scheme;
 3298  0
         instance._opaque = _opaque;
 3299  0
         instance._authority = _authority;
 3300  0
         instance._userinfo = _userinfo;
 3301  0
         instance._host = _host;
 3302  0
         instance._port = _port;
 3303  0
         instance._path = _path;
 3304  0
         instance._query = _query;
 3305  0
         instance._fragment = _fragment;
 3306  
         // flags
 3307  0
         instance._is_hier_part = _is_hier_part;
 3308  0
         instance._is_opaque_part = _is_opaque_part;
 3309  0
         instance._is_net_path = _is_net_path;
 3310  0
         instance._is_abs_path = _is_abs_path;
 3311  0
         instance._is_rel_path = _is_rel_path;
 3312  0
         instance._is_reg_name = _is_reg_name;
 3313  0
         instance._is_server = _is_server;
 3314  0
         instance._is_hostname = _is_hostname;
 3315  0
         instance._is_IPv4address = _is_IPv4address;
 3316  0
         instance._is_IPv6reference = _is_IPv6reference;
 3317  
 
 3318  0
         return instance;
 3319  
     }
 3320  
 
 3321  
     // ------------------------------------------------------------ Get the URI
 3322  
 
 3323  
     /**
 3324  
      * It can be gotten the URI character sequence. It's raw-escaped.
 3325  
      * For the purpose of the protocol to be transported, it will be useful.
 3326  
      * <p>
 3327  
      * It is clearly unwise to use a URL that contains a password which is
 3328  
      * intended to be secret. In particular, the use of a password within
 3329  
      * the 'userinfo' component of a URL is strongly disrecommended except
 3330  
      * in those rare cases where the 'password' parameter is intended to be
 3331  
      * public.
 3332  
      * <p>
 3333  
      * When you want to get each part of the userinfo, you need to use the
 3334  
      * specific methods in the specific URL. It depends on the specific URL.
 3335  
      *
 3336  
      * @return URI character sequence
 3337  
      */
 3338  
     public char[] getRawURI() {
 3339  0
         return _uri;
 3340  
     }
 3341  
 
 3342  
 
 3343  
     /**
 3344  
      * It can be gotten the URI character sequence. It's escaped.
 3345  
      * For the purpose of the protocol to be transported, it will be useful.
 3346  
      *
 3347  
      * @return the URI string
 3348  
      */
 3349  
     public String getEscapedURI() {
 3350  0
         return (_uri == null) ? null : new String(_uri);
 3351  
     }
 3352  
     
 3353  
 
 3354  
     /**
 3355  
      * It can be gotten the URI character sequence.
 3356  
      *
 3357  
      * @return the URI string
 3358  
      * @exception IOException incomplete trailing escape pattern
 3359  
      * Or unsupported character encoding
 3360  
      * @see #decode
 3361  
      */
 3362  
     public String getURI() throws IOException {
 3363  0
         return (_uri == null) ? null : decode(_uri);
 3364  
     }
 3365  
 
 3366  
 
 3367  
     /**
 3368  
      * Get the escaped URI string.
 3369  
      * <p>
 3370  
      * On the document, the URI-reference form is only used without the userinfo
 3371  
      * component like http://jakarta.apache.org/ by the security reason.
 3372  
      * But the URI-reference form with the userinfo component could be parsed.
 3373  
      * <p>
 3374  
      * In other words, this URI and any its subclasses must not expose the
 3375  
      * URI-reference expression with the userinfo component like
 3376  
      * http://user:password@hostport/restricted_zone.<br>
 3377  
      * It means that the API client programmer should extract each user and
 3378  
      * password to access manually.  Probably it will be supported in the each
 3379  
      * subclass, however, not a whole URI-reference expression.
 3380  
      *
 3381  
      * @return the URI string
 3382  
      * @see #clone()
 3383  
      */
 3384  
     public String toString() {
 3385  0
         return getEscapedURI();
 3386  
     }
 3387  
 
 3388  
 
 3389  
     // ------------------------------------------------------------ Inner class
 3390  
 
 3391  
     /** 
 3392  
      * A mapping to determine the (somewhat arbitrarily) preferred charset for 
 3393  
      * a given locale.  Supports all locales recognized in JDK 1.1.
 3394  
      * <p>
 3395  
      * The distribution of this class is Servlets.com.    It was originally
 3396  
      * written by Jason Hunter [jhunter at acm.org] and used by with permission.
 3397  
      */
 3398  0
     public static class LocaleToCharsetMap {
 3399  
 
 3400  
         private static Hashtable map;
 3401  
         static {
 3402  0
             map = new Hashtable();
 3403  0
             map.put("ar", "ISO-8859-6");
 3404  0
             map.put("be", "ISO-8859-5");
 3405  0
             map.put("bg", "ISO-8859-5");
 3406  0
             map.put("ca", "ISO-8859-1");
 3407  0
             map.put("cs", "ISO-8859-2");
 3408  0
             map.put("da", "ISO-8859-1");
 3409  0
             map.put("de", "ISO-8859-1");
 3410  0
             map.put("el", "ISO-8859-7");
 3411  0
             map.put("en", "ISO-8859-1");
 3412  0
             map.put("es", "ISO-8859-1");
 3413  0
             map.put("et", "ISO-8859-1");
 3414  0
             map.put("fi", "ISO-8859-1");
 3415  0
             map.put("fr", "ISO-8859-1");
 3416  0
             map.put("hr", "ISO-8859-2");
 3417  0
             map.put("hu", "ISO-8859-2");
 3418  0
             map.put("is", "ISO-8859-1");
 3419  0
             map.put("it", "ISO-8859-1");
 3420  0
             map.put("iw", "ISO-8859-8");
 3421  0
             map.put("ja", "Shift_JIS");
 3422  0
             map.put("ko", "EUC-KR");
 3423  0
             map.put("lt", "ISO-8859-2");
 3424  0
             map.put("lv", "ISO-8859-2");
 3425  0
             map.put("mk", "ISO-8859-5");
 3426  0
             map.put("nl", "ISO-8859-1");
 3427  0
             map.put("no", "ISO-8859-1");
 3428  0
             map.put("pl", "ISO-8859-2");
 3429  0
             map.put("pt", "ISO-8859-1");
 3430  0
             map.put("ro", "ISO-8859-2");
 3431  0
             map.put("ru", "ISO-8859-5");
 3432  0
             map.put("sh", "ISO-8859-5");
 3433  0
             map.put("sk", "ISO-8859-2");
 3434  0
             map.put("sl", "ISO-8859-2");
 3435  0
             map.put("sq", "ISO-8859-2");
 3436  0
             map.put("sr", "ISO-8859-5");
 3437  0
             map.put("sv", "ISO-8859-1");
 3438  0
             map.put("tr", "ISO-8859-9");
 3439  0
             map.put("uk", "ISO-8859-5");
 3440  0
             map.put("zh", "GB2312");
 3441  0
             map.put("zh_TW", "Big5");
 3442  0
         }
 3443  
        
 3444  
         /**
 3445  
          * Get the preferred charset for the given locale.
 3446  
          *
 3447  
          * @param locale the locale
 3448  
          * @return the preferred charset
 3449  
          * or null if the locale is not recognized
 3450  
          */
 3451  
         public static String getCharset(Locale locale) {
 3452  
             // try for an full name match (may include country)
 3453  0
             String charset = (String) map.get(locale.toString());
 3454  0
             if (charset != null) return charset;
 3455  
            
 3456  
             // if a full name didn't match, try just the language
 3457  0
             charset = (String) map.get(locale.getLanguage());
 3458  0
             return charset;  // may be null
 3459  
         }
 3460  
 
 3461  
     }
 3462  
 
 3463  
 }
 3464