View Javadoc

1   /*
2    * $Header$
3    * $Revision: 905 $
4    * $Date: 2006-02-19 01:44:03 +0000 (Sun, 19 Feb 2006) $
5    *
6    * ====================================================================
7    *
8    * The Apache Software License, Version 1.1
9    *
10   * Copyright (c) 2002 the Apache Software Foundation.  All rights 
11   * reserved.
12   *
13   * Redistribution and use in source and binary forms, with or without
14   * modification, are permitted provided that the following conditions
15   * are met:
16   *
17   * 1. Redistributions of source code must retain the above copyright
18   *    notice, this list of conditions and the following disclaimer. 
19   *
20   * 2. Redistributions in binary form must reproduce the above copyright
21   *    notice, this list of conditions and the following disclaimer in
22   *    the documentation and/or other materials provided with the
23   *    distribution.
24   *
25   * 3. The end-user documentation included with the redistribution, if
26   *    any, must include the following acknowlegement:  
27   *       "This product includes software developed by the 
28   *        Apache Software Foundation (http://www.apache.org/)."
29   *    Alternately, this acknowlegement may appear in the software itself,
30   *    if and wherever such third-party acknowlegements normally appear.
31   *
32   * 4. The names "The Jakarta Project", "HttpClient", and "Apache Software
33   *    Foundation" must not be used to endorse or promote products derived
34   *    from this software without prior written permission. For written 
35   *    permission, please contact apache@apache.org.
36   *
37   * 5. Products derived from this software may not be called "Apache"
38   *    nor may "Apache" appear in their names without prior written
39   *    permission of the Apache Group.
40   *
41   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
42   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
44   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
45   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
48   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
49   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
50   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
51   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52   * SUCH DAMAGE.
53   * ====================================================================
54   *
55   * This software consists of voluntary contributions made by many
56   * individuals on behalf of the Apache Software Foundation.  For more
57   * information on the Apache Software Foundation, please see
58   * <http://www.apache.org/>.
59   *
60   * [Additional notices, if required by prior licensing conditions]
61   *
62   */ 
63  
64  // excellent class borrowed from Apache Commons project:
65  //package org.apache.commons.httpclient;
66  
67  package net.wotonomy.web;
68  
69  import java.io.IOException;
70  import java.io.Serializable;
71  import java.io.UnsupportedEncodingException;
72  import java.net.URL;
73  import java.security.AccessController;
74  import java.util.BitSet;
75  import java.util.Hashtable;
76  import java.util.Locale;
77  
78  import sun.security.action.GetPropertyAction;
79  
80  /***
81   * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
82   * This class has the purpose of supportting of parsing a URI reference to
83   * extend any specific protocols, the character encoding of the protocol to 
84   * be transported and the charset of the document.
85   * <p>
86   * A URI is always in an "escaped" form, since escaping or unescaping a
87   * completed URI might change its semantics.  
88   * <p>
89   * Implementers should be careful not to escape or unescape the same string
90   * more than once, since unescaping an already unescaped string might lead to
91   * misinterpreting a percent data character as another escaped character,
92   * or vice versa in the case of escaping an already escaped string.
93   * <p>
94   * In order to avoid these problems, data types used as follows:
95   * <p><blockquote><pre>
96   *   URI character sequence: char
97   *   octet sequence: byte
98   *   original character sequence: String
99   * </pre></blockquote><p>
100  *
101  * So, a URI is a sequence of characters as an array of a char type, which
102  * is not always represented as a sequence of octets as an array of byte.
103  * <p>
104  * 
105  * URI Syntactic Components
106  * <p><blockquote><pre>
107  * - In general, written as follows:
108  *   Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
109  *   Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
110  *
111  * - Syntax
112  *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
113  *   hier_part     = ( net_path | abs_path ) [ "?" query ]
114  *   net_path      = "//" authority [ abs_path ]
115  *   abs_path      = "/"  path_segments
116  * </pre></blockquote><p>
117  *
118  * The following examples illustrate URI that are in common use.
119  * <pre>
120  * ftp://ftp.is.co.za/rfc/rfc1808.txt
121  *    -- ftp scheme for File Transfer Protocol services
122  * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
123  *    -- gopher scheme for Gopher and Gopher+ Protocol services
124  * http://www.math.uio.no/faq/compression-faq/part1.html
125  *    -- http scheme for Hypertext Transfer Protocol services
126  * mailto:mduerst@ifi.unizh.ch
127  *    -- mailto scheme for electronic mail addresses
128  * news:comp.infosystems.www.servers.unix
129  *    -- news scheme for USENET news groups and articles
130  * telnet://melvyl.ucop.edu/
131  *    -- telnet scheme for interactive services via the TELNET Protocol
132  * </pre>
133  * Please, notice that there are many modifications from URL(RFC 1738) and
134  * relative URL(RFC 1808).
135  * <p>
136  * <b>The expressions for a URI</b>
137  * <p><pre>
138  * For escaped URI forms
139  *  - URI(char[]) // constructor
140  *  - char[] getRawXxx() // method
141  *  - String getEscapedXxx() // method
142  *  - String toString() // method
143  * <p>
144  * For unescaped URI forms
145  *  - URI(String) // constructor
146  *  - String getXXX() // method
147  * </pre><p>
148  *
149  * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
150  * @version $Revision: 905 $ $Date: 2002/03/14 15:14:01 
151  */
152 class URI implements Cloneable, Comparable, Serializable {
153 
154 
155     // ----------------------------------------------------------- Constructors
156 
157     protected URI() {
158     }
159 
160     /***
161      * Construct a URI as an escaped form of a character array.
162      * An URI can be placed within double-quotes or angle brackets like 
163      * "http://test.com/" and &lt;http://test.com/&gt;
164      * 
165      * @param escaped the URI character sequence
166      * @exception IOException
167      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
168      */
169     public URI(char[] escaped) throws IOException {
170         parseUriReference(new String(escaped), true);
171     }
172 
173 
174     /***
175      * Construct a URI from the given string.
176      * <p><blockquote><pre>
177      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
178      * </pre></blockquote><p>
179      * An URI can be placed within double-quotes or angle brackets like 
180      * "http://test.com/" and &lt;http://test.com/&gt;
181      *
182      * @param original the string to be represented to URI character sequence
183      * It is one of absoluteURI and relativeURI.
184      * @exception IOException
185      */
186     public URI(String original) throws IOException {
187         parseUriReference(original, false);
188     }
189 
190     /***
191      * Construct a URI from a URL.
192      *
193      * @param url a valid URL.
194      * @throws IOException
195      * @since 2.0 
196      */
197     public URI(URL url) throws IOException {
198         this(url.toString());
199     }
200 
201 
202     /***
203      * Construct a general URI from the given components.
204      * <p><blockquote><pre>
205      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
206      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
207      *   opaque_part   = uric_no_slash *uric
208      * </pre></blockquote><p>
209      * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
210      * &lt;fragment&gt;.
211      *
212      * @param scheme the scheme string
213      * @param scheme_specific_part scheme_specific_part
214      * @param fragment the fragment string
215      * @exception IOException
216      */
217     public URI(String scheme, String scheme_specific_part, String fragment)
218         throws IOException {
219 
220         // validate and contruct the URI character sequence
221         if (scheme == null) {
222            throw new IOException(/*IOException.PARSING,*/ "URI: scheme required");
223         }
224         char[] s = scheme.toLowerCase().toCharArray();
225         if (validate(s, URI.scheme)) {
226             _scheme = s; // is_absoluteURI
227         } else {
228             throw new IOException(/*IOException.PARSING,*/ "URI: incorrect scheme");
229         }
230         _opaque = encode(scheme_specific_part, allowed_opaque_part);
231         // Set flag
232         _is_opaque_part = true;
233         setUriReference();
234     }
235 
236 
237     /***
238      * Construct a general URI from the given components.
239      * <p><blockquote><pre>
240      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
241      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
242      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
243      *   hier_part     = ( net_path | abs_path ) [ "?" query ]
244      * </pre></blockquote><p>
245      * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
246      * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
247      * &gt;.
248      *
249      * @param scheme the scheme string
250      * @param authority the authority string
251      * @param path the path string
252      * @param query the query string
253      * @param fragment the fragment string
254      * @exception IOException
255      */
256     public URI(String scheme, String authority, String path, String query,
257                String fragment) throws IOException {
258 
259         // validate and contruct the URI character sequence
260         StringBuffer buff = new StringBuffer();
261         if (scheme != null) {
262             buff.append(scheme);
263             buff.append(':');
264         }
265         if (authority != null) {
266             buff.append("//");
267             buff.append(authority);
268         }
269         if (path != null) {  // accept empty path
270             if ((scheme != null || authority != null)
271                     && !path.startsWith("/")) {
272                 throw new IOException(/*IOException.PARSING*,*/
273                         "URI: abs_path requested");
274             }
275             buff.append(path);
276         }
277         if (query != null) {
278             buff.append('?');
279             buff.append(query);
280         }
281         if (fragment != null) {
282             buff.append('#');
283             buff.append(fragment);
284         }
285         parseUriReference(buff.toString(), false);
286     }
287 
288 
289     /***
290      * Construct a general URI from the given components.
291      *
292      * @param scheme the scheme string
293      * @param userinfo the userinfo string
294      * @param host the host string
295      * @param port the port number
296      * @exception IOException
297      */
298     public URI(String scheme, String userinfo, String host, int port)
299         throws IOException {
300 
301         this(scheme, userinfo, host, port, null, null, null);
302     }
303 
304 
305     /***
306      * Construct a general URI from the given components.
307      *
308      * @param scheme the scheme string
309      * @param userinfo the userinfo string
310      * @param host the host string
311      * @param port the port number
312      * @param path the path string
313      * @exception IOException
314      */
315     public URI(String scheme, String userinfo, String host, int port,
316             String path) throws IOException {
317 
318         this(scheme, userinfo, host, port, path, null, null);
319     }
320 
321 
322     /***
323      * Construct a general URI from the given components.
324      *
325      * @param scheme the scheme string
326      * @param userinfo the userinfo string
327      * @param host the host string
328      * @param port the port number
329      * @param path the path string
330      * @param query the query string
331      * @exception IOException
332      */
333     public URI(String scheme, String userinfo, String host, int port,
334             String path, String query) throws IOException {
335 
336         this(scheme, userinfo, host, port, path, query, null);
337     }
338 
339 
340     /***
341      * Construct a general URI from the given components.
342      *
343      * @param scheme the scheme string
344      * @param userinfo the userinfo string
345      * @param host the host string
346      * @param port the port number
347      * @param path the path string
348      * @param query the query string
349      * @param fragment the fragment string
350      * @exception IOException
351      */
352     public URI(String scheme, String userinfo, String host, int port,
353             String path, String query, String fragment) throws IOException {
354 
355         this(scheme, (host == null) ? null :
356                 ((userinfo != null) ? userinfo + '@' : "") + host +
357                 ((port != -1) ? ":" + port : ""), path, query, fragment);
358     }
359 
360 
361     /***
362      * Construct a general URI from the given components.
363      *
364      * @param scheme the scheme string
365      * @param host the host string
366      * @param path the path string
367      * @param fragment the fragment string
368      * @exception IOException
369      */
370     public URI(String scheme, String host, String path, String fragment)
371         throws IOException {
372 
373         this(scheme, host, path, null, fragment);
374     }
375 
376 
377     /***
378      * Construct a general URI with the given relative URI string.
379      *
380      * @param base the base URI
381      * @param relative the relative URI string
382      * @exception IOException
383      */
384     public URI(URI base, String relative) throws IOException {
385         this(base, new URI(relative));
386     }
387 
388 
389     /***
390      * Construct a general URI with the given relative URI.
391      * <p><blockquote><pre>
392      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
393      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
394      * </pre></blockquote><p>
395      * Resolving Relative References to Absolute Form.
396      *
397      * <strong>Examples of Resolving Relative URI References</strong>
398      *
399      * Within an object with a well-defined base URI of
400      * <p><blockquote><pre>
401      *   http://a/b/c/d;p?q
402      * </pre></blockquote><p>
403      * the relative URI would be resolved as follows:
404      *
405      * Normal Examples
406      *
407      * <p><blockquote><pre>
408      *   g:h           =  g:h
409      *   g             =  http://a/b/c/g
410      *   ./g           =  http://a/b/c/g
411      *   g/            =  http://a/b/c/g/
412      *   /g            =  http://a/g
413      *   //g           =  http://g
414      *   ?y            =  http://a/b/c/?y
415      *   g?y           =  http://a/b/c/g?y
416      *   #s            =  (current document)#s
417      *   g#s           =  http://a/b/c/g#s
418      *   g?y#s         =  http://a/b/c/g?y#s
419      *   ;x            =  http://a/b/c/;x
420      *   g;x           =  http://a/b/c/g;x
421      *   g;x?y#s       =  http://a/b/c/g;x?y#s
422      *   .             =  http://a/b/c/
423      *   ./            =  http://a/b/c/
424      *   ..            =  http://a/b/
425      *   ../           =  http://a/b/
426      *   ../g          =  http://a/b/g
427      *   ../..         =  http://a/
428      *   ../../        =  http://a/ 
429      *   ../../g       =  http://a/g
430      * </pre></blockquote><p>
431      *
432      * Some URI schemes do not allow a hierarchical syntax matching the
433      * <hier_part> syntax, and thus cannot use relative references.
434      *
435      * @param base the base URI
436      * @param relative the relative URI
437      * @exception IOException
438      */
439     public URI(URI base, URI relative) throws IOException {
440 
441         if (base._scheme == null) {
442             throw new IOException(/* IOException.PARSING,*/ "URI: base URI required");
443         }
444         if (base._scheme != null) {
445             this._scheme = base._scheme;
446             this._authority = base._authority;
447         }
448         if (base._is_opaque_part || relative._is_opaque_part) {
449             this._scheme = base._scheme;
450             this._is_opaque_part = relative._is_opaque_part;
451             this._opaque = relative._opaque;
452             this._fragment = relative._fragment;
453             this.setUriReference();
454             return;
455         }
456         if (relative._scheme != null) {
457             this._scheme = relative._scheme;
458             this._is_net_path = relative._is_net_path;
459             this._authority = relative._authority;
460             if (relative._is_server) {
461                 this._userinfo = relative._userinfo;
462                 this._host = relative._host;
463                 this._port = relative._port;
464             } else if (relative._is_reg_name) {
465                 this._is_reg_name = relative._is_reg_name;
466             }
467             this._is_abs_path = relative._is_abs_path;
468             this._is_rel_path = relative._is_rel_path;
469             this._path = relative._path;
470         } else if (base._authority != null && relative._scheme == null) {
471             this._is_net_path = base._is_net_path;
472             this._authority = base._authority;
473             if (base._is_server) {
474                 this._userinfo = base._userinfo;
475                 this._host = base._host;
476                 this._port = base._port;
477             } else if (base._is_reg_name) {
478                 this._is_reg_name = base._is_reg_name;
479             }
480         }
481         if (relative._authority != null) {
482             this._is_net_path = relative._is_net_path;
483             this._authority = relative._authority;
484             if (relative._is_server) {
485                 this._is_server = relative._is_server;
486                 this._userinfo = relative._userinfo;
487                 this._host = relative._host;
488                 this._port = relative._port;
489             } else if (relative._is_reg_name) {
490                 this._is_reg_name = relative._is_reg_name;
491             }
492             this._is_abs_path = relative._is_abs_path;
493             this._is_rel_path = relative._is_rel_path;
494             this._path = relative._path;
495         }
496         // resolve the path
497         if (relative._scheme == null && relative._authority == null || 
498                 equals(base._scheme, relative._scheme)) {
499             this._path = resolvePath(base._path, relative._path);
500         }
501         // base._query removed
502         if (relative._query != null) {
503             this._query = relative._query;
504         }
505         // base._fragment removed
506         if (relative._fragment != null) {
507             this._fragment = relative._fragment;
508         }
509         this.setUriReference();
510     }
511 
512     // --------------------------------------------------- Instance Variables
513 
514     static final long serialVersionUID = 604752400577948726L;
515 
516 
517     /***
518      * This Uniform Resource Identifier (URI).
519      * The URI is always in an "escaped" form, since escaping or unescaping
520      * a completed URI might change its semantics.  
521      */
522     protected char[] _uri = null;
523 
524 
525     /***
526      * The default charset of the protocol.  RFC 2277, 2396
527      */
528     protected static String _protocolCharset = "UTF-8";
529 
530 
531     /***
532      * The default charset of the document.  RFC 2277, 2396
533      * The platform's charset is used for the document by default.
534      */
535     protected static String _documentCharset = null;
536     // Static initializer for _documentCharset
537     static {
538         Locale locale = Locale.getDefault();
539         if (locale != null) {
540             // in order to support backward compatiblity
541             _documentCharset = LocaleToCharsetMap.getCharset(locale);
542         } else {
543             _documentCharset = (String)AccessController.doPrivileged(
544                     new GetPropertyAction("file.encoding"));
545         }
546     }
547 
548     /***
549      * The scheme.
550      */
551     protected char[] _scheme = null;
552 
553 
554     /***
555      * The opaque.
556      */
557     protected char[] _opaque = null;
558 
559 
560     /***
561      * The authority.
562      */
563     protected char[] _authority = null;
564 
565 
566     /***
567      * The userinfo.
568      */
569     protected char[] _userinfo = null;
570 
571 
572     /***
573      * The host.
574      */
575     protected char[] _host = null;
576 
577 
578     /***
579      * The port.
580      */
581     protected int _port = -1;
582 
583 
584     /***
585      * The path.
586      */
587     protected char[] _path = null;
588 
589 
590     /***
591      * The query.
592      */
593     protected char[] _query = null;
594 
595 
596     /***
597      * The fragment.
598      */
599     protected char[] _fragment = null;
600 
601 
602     /***
603      * The root path.
604      */
605     protected static char[] rootPath = { '/' };
606 
607     // ---------------------- Generous characters for each component validation
608 
609     /***
610      * The percent "%" character always has the reserved purpose of being the
611      * escape indicator, it must be escaped as "%25" in order to be used as
612      * data within a URI.
613      */
614     protected static final BitSet percent = new BitSet(256);
615     // Static initializer for percent
616     static {
617         percent.set('%');
618     }
619 
620 
621     /***
622      * BitSet for digit.
623      * <p><blockquote><pre>
624      * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
625      *            "8" | "9"
626      * </pre></blockquote><p>
627      */
628     protected static final BitSet digit = new BitSet(256);
629     // Static initializer for digit
630     static {
631         for(int i = '0'; i <= '9'; i++) {
632             digit.set(i);
633         }
634     }
635 
636 
637     /***
638      * BitSet for alpha.
639      * <p><blockquote><pre>
640      * alpha         = lowalpha | upalpha
641      * </pre></blockquote><p>
642      */
643     protected static final BitSet alpha = new BitSet(256);
644     // Static initializer for alpha
645     static {
646         for (int i = 'a'; i <= 'z'; i++) {
647             alpha.set(i);
648         }
649         for (int i = 'A'; i <= 'Z'; i++) {
650             alpha.set(i);
651         }
652     }
653 
654 
655     /***
656      * BitSet for alphanum (join of alpha &amp; digit).
657      * <p><blockquote><pre>
658      *  alphanum      = alpha | digit
659      * </pre></blockquote><p>
660      */
661     protected static final BitSet alphanum = new BitSet(256);
662     // Static initializer for alphanum
663     static {
664         alphanum.or(alpha);
665         alphanum.or(digit);
666     }
667 
668 
669     /***
670      * BitSet for hex.
671      * <p><blockquote><pre>
672      * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
673      *                         "a" | "b" | "c" | "d" | "e" | "f"
674      * </pre></blockquote><p>
675      */
676     protected static final BitSet hex = new BitSet(256);
677     // Static initializer for hex
678     static {
679         hex.or(digit);
680         for(int i = 'a'; i <= 'f'; i++) {
681             hex.set(i);
682         }
683         for(int i = 'A'; i <= 'F'; i++) {
684             hex.set(i);
685         }
686     }
687 
688 
689     /***
690      * BitSet for escaped.
691      * <p><blockquote><pre>
692      * escaped       = "%" hex hex
693      * </pre></blockquote><p>
694      */
695     protected static final BitSet escaped = new BitSet(256);
696     // Static initializer for escaped
697     static {
698         escaped.or(percent);
699         escaped.or(hex);
700     }
701 
702 
703     /***
704      * BitSet for mark.
705      * <p><blockquote><pre>
706      * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
707      *                 "(" | ")"
708      * </pre></blockquote><p>
709      */
710     protected static final BitSet mark = new BitSet(256);
711     // Static initializer for mark
712     static {
713         mark.set('-');
714         mark.set('_');
715         mark.set('.');
716         mark.set('!');
717         mark.set('~');
718         mark.set('*');
719         mark.set('\'');
720         mark.set('(');
721         mark.set(')');
722     }
723 
724 
725     /***
726      * Data characters that are allowed in a URI but do not have a reserved
727      * purpose are called unreserved.
728      * <p><blockquote><pre>
729      * unreserved    = alphanum | mark
730      * </pre></blockquote><p>
731      */
732     protected static final BitSet unreserved = new BitSet(256);
733     // Static initializer for unreserved
734     static {
735         unreserved.or(alphanum);
736         unreserved.or(mark);
737     }
738 
739 
740     /***
741      * BitSet for reserved.
742      * <p><blockquote><pre>
743      * reserved      = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" |
744      *                 "$" | ","
745      * </pre></blockquote><p>
746      */
747     protected static final BitSet reserved = new BitSet(256);
748     // Static initializer for reserved
749     static {
750         reserved.set(';');
751         reserved.set('/');
752         reserved.set('?');
753         reserved.set(':');
754         reserved.set('@');
755         reserved.set('&');
756         reserved.set('=');
757         reserved.set('+');
758         reserved.set('$');
759         reserved.set(',');
760     }
761 
762 
763     /***
764      * BitSet for uric.
765      * <p><blockquote><pre>
766      * uric          = reserved | unreserved | escaped
767      * </pre></blockquote><p>
768      */
769     protected static final BitSet uric = new BitSet(256);
770     // Static initializer for uric
771     static {
772         uric.or(reserved);
773         uric.or(unreserved);
774         uric.or(escaped);
775     }
776 
777 
778     /***
779      * BitSet for fragment (alias for uric).
780      * <p><blockquote><pre>
781      * fragment      = *uric
782      * </pre></blockquote><p>
783      */
784     protected static final BitSet fragment = uric;
785 
786 
787     /***
788      * BitSet for query (alias for uric).
789      * <p><blockquote><pre>
790      * query         = *uric
791      * </pre></blockquote><p>
792      */
793     protected static final BitSet query = uric;
794 
795 
796     /***
797      * BitSet for pchar.
798      * <p><blockquote><pre>
799      * pchar         = unreserved | escaped |
800      *                 ":" | "@" | "&amp;" | "=" | "+" | "$" | ","
801      * </pre></blockquote><p>
802      */
803     protected static final BitSet pchar = new BitSet(256);
804     // Static initializer for pchar
805     static {
806         pchar.or(unreserved);
807         pchar.or(escaped);
808         pchar.set(':');
809         pchar.set('@');
810         pchar.set('&');
811         pchar.set('=');
812         pchar.set('+');
813         pchar.set('$');
814         pchar.set(',');
815     }
816 
817 
818     /***
819      * BitSet for param (alias for pchar).
820      * <p><blockquote><pre>
821      * param         = *pchar
822      * </pre></blockquote><p>
823      */
824     protected static final BitSet param = pchar;
825 
826 
827     /***
828      * BitSet for segment.
829      * <p><blockquote><pre>
830      * segment       = *pchar *( ";" param )
831      * </pre></blockquote><p>
832      */
833     protected static final BitSet segment = new BitSet(256);
834     // Static initializer for segment
835     static {
836         segment.or(pchar);
837         segment.set(';');
838         segment.or(param);
839     }
840 
841 
842     /***
843      * BitSet for path segments.
844      * <p><blockquote><pre>
845      * path_segments = segment *( "/" segment )
846      * </pre></blockquote><p>
847      */
848     protected static final BitSet path_segments = new BitSet(256);
849     // Static initializer for path_segments
850     static {
851         path_segments.set('/');
852         path_segments.or(segment);
853     }
854 
855 
856     /***
857      * URI absolute path.
858      * <p><blockquote><pre>
859      * abs_path      = "/"  path_segments
860      * </pre><blockquote><p>
861      */
862     protected static final BitSet abs_path = new BitSet(256);
863     // Static initializer for abs_path
864     static {
865         abs_path.set('/');
866         abs_path.or(path_segments);
867     }
868 
869 
870     /***
871      * URI bitset for encoding typical non-slash characters.
872      * <p><blockquote><pre>
873      * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
874      *                 "&amp;" | "=" | "+" | "$" | ","
875      * </pre></blockquote><p>
876      */
877     protected static final BitSet uric_no_slash = new BitSet(256);
878     // Static initializer for uric_no_slash
879     static {
880         uric_no_slash.or(unreserved);
881         uric_no_slash.or(escaped);
882         uric_no_slash.set(';');
883         uric_no_slash.set('?');
884         uric_no_slash.set(';');
885         uric_no_slash.set('@');
886         uric_no_slash.set('&');
887         uric_no_slash.set('=');
888         uric_no_slash.set('+');
889         uric_no_slash.set('$');
890         uric_no_slash.set(',');
891     }
892     
893 
894     /***
895      * URI bitset that combines uric_no_slash and uric.
896      * <p><blockquote><pre>
897      * opaque_part   = uric_no_slash *uric
898      * </pre></blockquote><p>
899      */
900     protected static final BitSet opaque_part = new BitSet(256);
901     // Static initializer for opaque_part
902     static {
903         opaque_part.or(uric_no_slash);
904         opaque_part.or(uric);
905     }
906     
907 
908     /***
909      * URI bitset that combines absolute path and opaque part.
910      * <p><blockquote><pre>
911      * path          = [ abs_path | opaque_part ]
912      * </pre></blockquote><p>
913      */
914     protected static final BitSet path = new BitSet(256);
915     // Static initializer for path
916     static {
917         path.or(abs_path);
918         path.or(opaque_part);
919     }
920 
921 
922     /***
923      * Port, a logical alias for digit.
924      */
925     protected static final BitSet port = digit;
926 
927 
928     /***
929      * Bitset that combines digit and dot fo IPv$address.
930      * <p><blockquote><pre>
931      * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
932      * </pre></blockquote><p>
933      */
934     protected static final BitSet IPv4address = new BitSet(256);
935     // Static initializer for IPv4address
936     static {
937         IPv4address.or(digit);
938         IPv4address.set('.');
939     }
940 
941 
942     /***
943      * RFC 2373.
944      * <p><blockquote><pre>
945      * IPv6address = hexpart [ ":" IPv4address ]
946      * </pre></blockquote><p>
947      */
948     protected static final BitSet IPv6address = new BitSet(256);
949     // Static initializer for IPv6address reference
950     static {
951         IPv6address.or(hex); // hexpart
952         IPv6address.set(':');
953         IPv6address.or(IPv4address);
954     }
955 
956 
957     /***
958      * RFC 2732, 2373.
959      * <p><blockquote><pre>
960      * IPv6reference   = "[" IPv6address "]"
961      * </pre></blockquote><p>
962      */
963     protected static final BitSet IPv6reference = new BitSet(256);
964     // Static initializer for IPv6reference
965     static {
966         IPv6reference.set('[');
967         IPv6reference.or(IPv6address);
968         IPv6reference.set(']');
969     }
970 
971 
972     /***
973      * BitSet for toplabel.
974      * <p><blockquote><pre>
975      * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
976      * </pre></blockquote><p>
977      */
978     protected static final BitSet toplabel = new BitSet(256);
979     // Static initializer for toplabel
980     static {
981         toplabel.or(alphanum);
982         toplabel.set('-');
983     }
984 
985 
986     /***
987      * BitSet for domainlabel.
988      * <p><blockquote><pre>
989      * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
990      * </pre></blockquote><p>
991      */
992     protected static final BitSet domainlabel = toplabel;
993 
994 
995     /***
996      * BitSet for hostname.
997      * <p><blockquote><pre>
998      * hostname      = *( domainlabel "." ) toplabel [ "." ]
999      * </pre></blockquote><p>
1000      */
1001     protected static final BitSet hostname = new BitSet(256);
1002     // Static initializer for hostname
1003     static {
1004         hostname.or(toplabel);
1005         // hostname.or(domainlabel);
1006         hostname.set('.');
1007     }
1008 
1009 
1010     /***
1011      * BitSet for host.
1012      * <p><blockquote><pre>
1013      * host          = hostname | IPv4address | IPv6reference
1014      * </pre></blockquote><p>
1015      */
1016     protected static final BitSet host = new BitSet(256);
1017     // Static initializer for host
1018     static {
1019         host.or(hostname);
1020         // host.or(IPv4address);
1021         host.or(IPv6reference); // IPv4address
1022     }
1023 
1024 
1025     /***
1026      * BitSet for hostport.
1027      * <p><blockquote><pre>
1028      * hostport      = host [ ":" port ]
1029      * </pre></blockquote><p>
1030      */
1031     protected static final BitSet hostport = new BitSet(256);
1032     // Static initializer for hostport
1033     static {
1034         hostport.or(host);
1035         hostport.set(':');
1036         hostport.or(port);
1037     }
1038 
1039 
1040     /***
1041      * Bitset for userinfo.
1042      * <p><blockquote><pre>
1043      * userinfo      = *( unreserved | escaped |
1044      *                    ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
1045      * </pre></blockquote><p>
1046      */
1047     protected static final BitSet userinfo = new BitSet(256);
1048     // Static initializer for userinfo
1049     static {
1050         userinfo.or(unreserved);
1051         userinfo.or(escaped);
1052         userinfo.set(';');
1053         userinfo.set(':');
1054         userinfo.set('&');
1055         userinfo.set('=');
1056         userinfo.set('+');
1057         userinfo.set('$');
1058         userinfo.set(',');
1059     }
1060 
1061 
1062     /***
1063      * BitSet for within the userinfo component like user and password.
1064      */
1065     public static final BitSet within_userinfo = new BitSet(256);
1066     // Static initializer for within_userinfo
1067     static {
1068         within_userinfo.or(userinfo);
1069         within_userinfo.clear(';'); // reserved within authority
1070         within_userinfo.clear(':');
1071         within_userinfo.clear('@');
1072         within_userinfo.clear('?');
1073         within_userinfo.clear('/');
1074     }
1075 
1076 
1077     /***
1078      * Bitset for server.
1079      * <p><blockquote><pre>
1080      * server        = [ [ userinfo "@" ] hostport ]
1081      * </pre></blockquote><p>
1082      */
1083     protected static final BitSet server = new BitSet(256);
1084     // Static initializer for server
1085     static {
1086         server.or(userinfo);
1087         server.set('@');
1088         server.or(hostport);
1089     }
1090 
1091 
1092     /***
1093      * BitSet for reg_name.
1094      * <p><blockquote><pre>
1095      * reg_name      = 1*( unreserved | escaped | "$" | "," |
1096      *                     ";" | ":" | "@" | "&amp;" | "=" | "+" )
1097      * </pre></blockquote><p>
1098      */
1099     protected static final BitSet reg_name = new BitSet(256);
1100     // Static initializer for reg_name
1101     static {
1102         reg_name.or(unreserved);
1103         reg_name.or(escaped);
1104         reg_name.set('$');
1105         reg_name.set(',');
1106         reg_name.set(';');
1107         reg_name.set(':');
1108         reg_name.set('@');
1109         reg_name.set('&');
1110         reg_name.set('=');
1111         reg_name.set('+');
1112     }
1113 
1114 
1115     /***
1116      * BitSet for authority.
1117      * <p><blockquote><pre>
1118      * authority     = server | reg_name
1119      * </pre></blockquote><p>
1120      */
1121     protected static final BitSet authority = new BitSet(256);
1122     // Static initializer for authority
1123     static {
1124         authority.or(server);
1125         authority.or(reg_name);
1126     }
1127 
1128 
1129     /***
1130      * BitSet for scheme.
1131      * <p><blockquote><pre>
1132      * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
1133      * </pre></blockquote><p>
1134      */
1135     protected static final BitSet scheme = new BitSet(256);
1136     // Static initializer for scheme
1137     static {
1138         scheme.or(alpha);
1139         scheme.or(digit);
1140         scheme.set('+');
1141         scheme.set('-');
1142         scheme.set('.');
1143     }
1144 
1145 
1146     /***
1147      * BitSet for rel_segment.
1148      * <p><blockquote><pre>
1149      * rel_segment   = 1*( unreserved | escaped |
1150      *                     ";" | "@" | "&amp;" | "=" | "+" | "$" | "," )
1151      * </pre></blockquote><p>
1152      */
1153     protected static final BitSet rel_segment = new BitSet(256);
1154     // Static initializer for rel_segment
1155     static {
1156         rel_segment.or(unreserved);
1157         rel_segment.or(escaped);
1158         rel_segment.set(';');
1159         rel_segment.set('@');
1160         rel_segment.set('&');
1161         rel_segment.set('=');
1162         rel_segment.set('+');
1163         rel_segment.set('$');
1164         rel_segment.set(',');
1165     }
1166 
1167 
1168     /***
1169      * BitSet for rel_path.
1170      * <p><blockquote><pre>
1171      * rel_path      = rel_segment [ abs_path ]
1172      * </pre></blockquote><p>
1173      */
1174     protected static final BitSet rel_path = new BitSet(256);
1175     // Static initializer for rel_path
1176     static {
1177         rel_path.or(rel_segment);
1178         rel_path.or(abs_path);
1179     }
1180 
1181 
1182     /***
1183      * BitSet for net_path.
1184      * <p><blockquote><pre>
1185      * net_path      = "//" authority [ abs_path ]
1186      * </pre></blockquote><p>
1187      */
1188     protected static final BitSet net_path = new BitSet(256);
1189     // Static initializer for net_path
1190     static {
1191         net_path.set('/');
1192         net_path.or(authority);
1193         net_path.or(abs_path);
1194     }
1195     
1196 
1197     /***
1198      * BitSet for hier_part.
1199      * <p><blockquote><pre>
1200      * hier_part     = ( net_path | abs_path ) [ "?" query ]
1201      * </pre></blockquote><p>
1202      */
1203     protected static final BitSet hier_part = new BitSet(256);
1204     // Static initializer for hier_part
1205     static {
1206         hier_part.or(net_path);
1207         hier_part.or(abs_path);
1208         // hier_part.set('?'); aleady included
1209         hier_part.or(query);
1210     }
1211 
1212 
1213     /***
1214      * BitSet for relativeURI.
1215      * <p><blockquote><pre>
1216      * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
1217      * </pre></blockquote><p>
1218      */
1219     protected static final BitSet relativeURI = new BitSet(256);
1220     // Static initializer for relativeURI
1221     static {
1222         relativeURI.or(net_path);
1223         relativeURI.or(abs_path);
1224         relativeURI.or(rel_path);
1225         // relativeURI.set('?'); aleady included
1226         relativeURI.or(query);
1227     }
1228 
1229 
1230     /***
1231      * BitSet for absoluteURI.
1232      * <p><blockquote><pre>
1233      * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1234      * </pre></blockquote><p>
1235      */
1236     protected static final BitSet absoluteURI = new BitSet(256);
1237     // Static initializer for absoluteURI
1238     static {
1239         absoluteURI.or(scheme);
1240         absoluteURI.set(':');
1241         absoluteURI.or(hier_part);
1242         absoluteURI.or(opaque_part);
1243     }
1244 
1245 
1246     /***
1247      * BitSet for URI-reference.
1248      * <p><blockquote><pre>
1249      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1250      * </pre></blockquote><p>
1251      */
1252     protected static final BitSet URI_reference = new BitSet(256);
1253     // Static initializer for URI_reference
1254     static {
1255         URI_reference.or(absoluteURI);
1256         URI_reference.or(relativeURI);
1257         URI_reference.set('#');
1258         URI_reference.or(fragment);
1259     }
1260 
1261     // ---------------------------- Characters disallowed within the URI syntax
1262     // Excluded US-ASCII Characters are like control, space, delims and unwise
1263 
1264     /***
1265      * BitSet for control.
1266      */
1267     public static final BitSet control = new BitSet(256);
1268     // Static initializer for control
1269     static {
1270         for (int i = 0; i <= 0x1F; i++) {
1271             control.set(i);
1272         }
1273         control.set(0x7F);
1274     }
1275 
1276     /***
1277      * BitSet for space.
1278      */
1279     public static final BitSet space = new BitSet(256);
1280     // Static initializer for space
1281     static {
1282         space.set(0x20);
1283     }
1284 
1285 
1286     /***
1287      * BitSet for delims.
1288      */
1289     public static final BitSet delims = new BitSet(256);
1290     // Static initializer for delims
1291     static {
1292         delims.set('<');
1293         delims.set('>');
1294         delims.set('#');
1295         delims.set('%');
1296         delims.set('"');
1297     }
1298 
1299 
1300     /***
1301      * BitSet for unwise.
1302      */
1303     public static final BitSet unwise = new BitSet(256);
1304     // Static initializer for unwise
1305     static {
1306         unwise.set('{');
1307         unwise.set('}');
1308         unwise.set('|');
1309         unwise.set('//');
1310         unwise.set('^');
1311         unwise.set('[');
1312         unwise.set(']');
1313         unwise.set('`');
1314     }
1315 
1316 
1317     /***
1318      * Disallowed rel_path before escaping.
1319      */
1320     public static final BitSet disallowed_rel_path = new BitSet(256);
1321     // Static initializer for disallowed_rel_path
1322     static {
1323         disallowed_rel_path.or(uric);
1324         disallowed_rel_path.andNot(rel_path);
1325     }
1326 
1327 
1328     /***
1329      * Disallowed opaque_part before escaping.
1330      */
1331     public static final BitSet disallowed_opaque_part = new BitSet(256);
1332     // Static initializer for disallowed_opaque_part
1333     static {
1334         disallowed_opaque_part.or(uric);
1335         disallowed_opaque_part.andNot(opaque_part);
1336     }
1337 
1338     // ----------------------- Characters allowed within and for each component
1339 
1340     /***
1341      * Those characters that are allowed for the authority component.
1342      */
1343     public static final BitSet allowed_authority = new BitSet(256);
1344     // Static initializer for allowed_authority
1345     static {
1346         allowed_authority.or(authority);
1347         allowed_authority.clear('%');
1348     }
1349 
1350 
1351     /***
1352      * Those characters that are allowed for the opaque_part.
1353      */
1354     public static final BitSet allowed_opaque_part = new BitSet(256);
1355     // Static initializer for allowed_opaque_part 
1356     static {
1357         allowed_opaque_part.or(opaque_part);
1358         allowed_opaque_part.clear('%');
1359     }
1360 
1361 
1362     /***
1363      * Those characters that are allowed for the reg_name.
1364      */
1365     public static final BitSet allowed_reg_name = new BitSet(256);
1366     // Static initializer for allowed_reg_name 
1367     static {
1368         allowed_reg_name.or(reg_name);
1369         // allowed_reg_name.andNot(percent);
1370         allowed_reg_name.clear('%');
1371     }
1372 
1373 
1374     /***
1375      * Those characters that are allowed for the userinfo component.
1376      */
1377     public static final BitSet allowed_userinfo = new BitSet(256);
1378     // Static initializer for allowed_userinfo
1379     static {
1380         allowed_userinfo.or(userinfo);
1381         // allowed_userinfo.andNot(percent);
1382         allowed_userinfo.clear('%');
1383     }
1384 
1385 
1386     /***
1387      * Those characters that are allowed for within the userinfo component.
1388      */
1389     public static final BitSet allowed_within_userinfo = new BitSet(256);
1390     // Static initializer for allowed_within_userinfo
1391     static {
1392         allowed_within_userinfo.or(within_userinfo);
1393         allowed_within_userinfo.clear('%');
1394     }
1395 
1396 
1397     /***
1398      * Those characters that are allowed for the IPv6reference component.
1399      * The characters '[', ']' in IPv6reference should be excluded.
1400      */
1401     public static final BitSet allowed_IPv6reference = new BitSet(256);
1402     // Static initializer for allowed_IPv6reference
1403     static {
1404         allowed_IPv6reference.or(IPv6reference);
1405         // allowed_IPv6reference.andNot(unwise);
1406         allowed_IPv6reference.clear('[');
1407         allowed_IPv6reference.clear(']');
1408     }
1409 
1410 
1411     /***
1412      * Those characters that are allowed for the host component.
1413      * The characters '[', ']' in IPv6reference should be excluded.
1414      */
1415     public static final BitSet allowed_host = new BitSet(256);
1416     // Static initializer for allowed_host
1417     static {
1418         allowed_host.or(hostname);
1419         allowed_host.or(allowed_IPv6reference);
1420     }
1421 
1422 
1423     /***
1424      * Those characters that are allowed for the authority component.
1425      */
1426     public static final BitSet allowed_within_authority = new BitSet(256);
1427     // Static initializer for allowed_within_authority
1428     static {
1429         allowed_within_authority.or(server);
1430         allowed_within_authority.or(reg_name);
1431         allowed_within_authority.clear(';');
1432         allowed_within_authority.clear(':');
1433         allowed_within_authority.clear('@');
1434         allowed_within_authority.clear('?');
1435         allowed_within_authority.clear('/');
1436     }
1437 
1438 
1439     /***
1440      * Those characters that are allowed for the abs_path.
1441      */
1442     public static final BitSet allowed_abs_path = new BitSet(256);
1443     // Static initializer for allowed_abs_path
1444     static {
1445         allowed_abs_path.or(abs_path);
1446         // allowed_abs_path.set('/');  // aleady included
1447         allowed_abs_path.andNot(percent);
1448     }
1449 
1450 
1451     /***
1452      * Those characters that are allowed for the rel_path.
1453      */
1454     public static final BitSet allowed_rel_path = new BitSet(256);
1455     // Static initializer for allowed_rel_path
1456     static {
1457         allowed_rel_path.or(rel_path);
1458         allowed_rel_path.clear('%');
1459     }
1460 
1461 
1462     /***
1463      * Those characters that are allowed within the path.
1464      */
1465     public static final BitSet allowed_within_path = new BitSet(256);
1466     // Static initializer for allowed_within_path
1467     static {
1468         allowed_within_path.or(abs_path);
1469         allowed_within_path.clear('/');
1470         allowed_within_path.clear(';');
1471         allowed_within_path.clear('=');
1472         allowed_within_path.clear('?');
1473     }
1474 
1475 
1476     /***
1477      * Those characters that are allowed for the query component.
1478      */
1479     public static final BitSet allowed_query = new BitSet(256);
1480     // Static initializer for allowed_query
1481     static {
1482         allowed_query.or(uric);
1483         allowed_query.clear('%');
1484     }
1485 
1486 
1487     /***
1488      * Those characters that are allowed within the query component.
1489      */
1490     public static final BitSet allowed_within_query = new BitSet(256);
1491     // Static initializer for allowed_within_query
1492     static {
1493         allowed_within_query.or(allowed_query);
1494         allowed_within_query.andNot(reserved); // excluded 'reserved'
1495         allowed_within_query.clear('#'); // avoid confict with the fragment
1496     }
1497 
1498 
1499     /***
1500      * Those characters that are allowed for the fragment component.
1501      */
1502     public static final BitSet allowed_fragment = new BitSet(256);
1503     // Static initializer for allowed_fragment
1504     static {
1505         allowed_fragment.or(uric);
1506         allowed_fragment.clear('%');
1507     }
1508 
1509     // ------------------------------------------- Flags for this URI-reference
1510 
1511     // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1512     // absoluteURI   = scheme ":" ( hier_part | opaque_part )
1513     protected boolean _is_hier_part;
1514     protected boolean _is_opaque_part;
1515     // relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ] 
1516     // hier_part     = ( net_path | abs_path ) [ "?" query ]
1517     protected boolean _is_net_path;
1518     protected boolean _is_abs_path;
1519     protected boolean _is_rel_path;
1520     // net_path      = "//" authority [ abs_path ] 
1521     // authority     = server | reg_name
1522     protected boolean _is_reg_name;
1523     protected boolean _is_server;  // = _has_server
1524     // server        = [ [ userinfo "@" ] hostport ]
1525     // host          = hostname | IPv4address | IPv6reference
1526     protected boolean _is_hostname;
1527     protected boolean _is_IPv4address;
1528     protected boolean _is_IPv6reference;
1529 
1530     // ------------------------------------------ Character and escape encoding
1531     
1532     /***
1533      * Encode with the default protocol charset.
1534      *
1535      * @param original the original character sequence
1536      * @param allowed those characters that are allowed within a component
1537      * @return URI character sequence
1538      * @exception IOException null component or unsupported character encoding
1539      */
1540     protected static char[] encode(String original, BitSet allowed)
1541         throws IOException {
1542 
1543         return encode(original, allowed, _protocolCharset);
1544     }
1545 
1546 
1547     /***
1548      * Encodes URI string.
1549      *
1550      * This is a two mapping, one from original characters to octets, and
1551      * subsequently a second from octets to URI characters:
1552      * <p><blockquote><pre>
1553      *   original character sequence->octet sequence->URI character sequence
1554      * </pre></blockquote><p>
1555      *
1556      * An escaped octet is encoded as a character triplet, consisting of the
1557      * percent character "%" followed by the two hexadecimal digits
1558      * representing the octet code. For example, "%20" is the escaped
1559      * encoding for the US-ASCII space character.
1560      * <p>
1561      * Conversion from the local filesystem character set to UTF-8 will
1562      * normally involve a two step process. First convert the local character
1563      * set to the UCS; then convert the UCS to UTF-8.
1564      * The first step in the process can be performed by maintaining a mapping
1565      * table that includes the local character set code and the corresponding
1566      * UCS code.
1567      * The next step is to convert the UCS character code to the UTF-8 encoding.
1568      * <p>
1569      * Mapping between vendor codepages can be done in a very similar manner
1570      * as described above.
1571      * <p>
1572      * The only time escape encodings can allowedly be made is when a URI is
1573      * being created from its component parts.  The escape and validate methods
1574      * are internally performed within this method.
1575      *
1576      * @param original the original character sequence
1577      * @param allowed those characters that are allowed within a component
1578      * @param charset the protocol charset
1579      * @return URI character sequence
1580      * @exception IOException null component or unsupported character encoding
1581      */
1582     protected static char[] encode(String original, BitSet allowed,
1583             String charset) throws IOException {
1584 
1585         // encode original to uri characters.
1586         if (original == null) {
1587             throw new IOException(/*IOException.PARSING,*/ "URI: null");
1588         }
1589         // escape octet to uri characters.
1590         if (allowed == null) {
1591             throw new IOException(/*IOException.PARSING,*/
1592                     "URI: null allowed characters");
1593         }
1594         byte[] octets;
1595         try {
1596             octets = original.getBytes(charset);
1597         } catch (UnsupportedEncodingException error) {
1598             throw new IOException(/*IOException.UNSUPPORTED_ENCODING,*/ "Unsupported Encoding: " + charset);
1599         }
1600         StringBuffer buf = new StringBuffer(octets.length);
1601         for (int i = 0; i < octets.length; i++) {
1602             char c = (char) octets[i];
1603             if (allowed.get(c)) {
1604                 buf.append(c);
1605             } else {
1606                 buf.append('%');
1607                 byte b = octets[i]; // use the original byte value
1608                 char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
1609                 buf.append(Character.toUpperCase(hexadecimal)); // high
1610                 hexadecimal = Character.forDigit(b & 0xF, 16);
1611                 buf.append(Character.toUpperCase(hexadecimal)); // low
1612             }
1613         }
1614 
1615         return buf.toString().toCharArray();
1616     }
1617 
1618 
1619     /***
1620      * Decode with the default protocol charset.
1621      *
1622      * @param component the URI character sequence
1623      * @return original character sequence
1624      * @exception IOException incomplete trailing escape pattern
1625      * or unsupported character encoding
1626      */
1627     protected static String decode(char[] component) throws IOException {
1628         return decode(component, _protocolCharset);
1629     }
1630 
1631 
1632     /***
1633      * Decodes URI encoded string.
1634      *
1635      * This is a two mapping, one from URI characters to octets, and
1636      * subsequently a second from octets to original characters:
1637      * <p><blockquote><pre>
1638      *   URI character sequence->octet sequence->original character sequence
1639      * </pre></blockquote><p>
1640      *
1641      * A URI must be separated into its components before the escaped
1642      * characters within those components can be allowedly decoded.
1643      * <p>
1644      * Notice that there is a chance that URI characters that are non UTF-8
1645      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1646      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1647      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1648      * false reading.
1649      * <p>
1650      * The percent "%" character always has the reserved purpose of being
1651      * the escape indicator, it must be escaped as "%25" in order to be used
1652      * as data within a URI.
1653      * <p>
1654      * The unescape method is internally performed within this method.
1655      *
1656      * @param component the URI character sequence
1657      * @param charset the protocol charset
1658      * @return original character sequence
1659      * @exception IOException incomplete trailing escape pattern
1660      * or unsupported character encoding
1661      */
1662     protected static String decode(char[] component, String charset)
1663         throws IOException {
1664 
1665         // unescape uri characters to octets
1666         if (component == null)  return null;
1667 
1668         byte[] octets;
1669         try {
1670             octets = new String(component).getBytes(charset);
1671         } catch (UnsupportedEncodingException error) {
1672             throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */
1673                     "URI: not supported " + charset + " encoding");
1674         }
1675         int length = octets.length;
1676         int oi = 0; // output index
1677         for (int ii = 0; ii < length; oi++) {
1678             byte aByte = (byte) octets[ii++];
1679             if (aByte == '%' && ii+2 <= length)  {
1680                 byte high = (byte) Character.digit((char) octets[ii++], 16);
1681                 byte low = (byte) Character.digit((char) octets[ii++], 16);
1682                 if (high == -1 || low == -1) {
1683                     throw new IOException(/* IOException.ESCAPING, */
1684                             "URI: incomplete trailing escape pattern");
1685                             
1686                 }
1687                 aByte = (byte) ((high << 4) + low);
1688             }
1689             octets[oi] = (byte) aByte;
1690         }
1691 
1692         String result;
1693         try {
1694             result = new String(octets, 0, oi, charset);
1695         } catch (UnsupportedEncodingException error) {
1696             throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */
1697                     "URI: not supported " + charset + " encoding");
1698         }
1699 
1700         return result;
1701     }
1702 
1703 
1704     /***
1705      * Pre-validate the unescaped URI string within a specific component.
1706      *
1707      * @param component the component string within the component
1708      * @param disallowed those characters disallowed within the component
1709      * @return if true, it doesn't have the disallowed characters
1710      * if false, the component is undefined or an incorrect one
1711      */
1712     protected boolean prevalidate(String component, BitSet disallowed) {
1713         // prevalidate the given component by disallowed characters
1714         if (component == null) {
1715             return false; // undefined
1716         }
1717         char[] target = component.toCharArray();
1718         for (int i = 0; i < target.length; i++) {
1719             if (disallowed.get(target[i])) {
1720                 return false;
1721             }
1722         }
1723         return true;
1724     }
1725 
1726 
1727     /***
1728      * Validate the URI characters within a specific component.
1729      * The component must be performed after escape encoding. Or it doesn't
1730      * include escaped characters.
1731      *
1732      * @param component the characters sequence within the component
1733      * @param generous those characters that are allowed within a component
1734      * @return if true, it's the correct URI character sequence
1735      */
1736     protected boolean validate(char[] component, BitSet generous) {
1737         // validate each component by generous characters
1738         return validate(component, 0, -1, generous);
1739     }
1740 
1741 
1742     /***
1743      * Validate the URI characters within a specific component.
1744      * The component must be performed after escape encoding. Or it doesn't
1745      * include escaped characters.
1746      * <p>
1747      * It's not that much strict, generous.  The strict validation might be 
1748      * performed before being called this method.
1749      *
1750      * @param component the characters sequence within the component
1751      * @param soffset the starting offset of the given component
1752      * @param eoffset the ending offset of the given component
1753      * if -1, it means the length of the component
1754      * @param generous those characters that are allowed within a component
1755      * @return if true, it's the correct URI character sequence
1756      * @throws NullPointerException null component
1757      */
1758     protected boolean validate(char[] component, int soffset, int eoffset,
1759             BitSet generous) {
1760         // validate each component by generous characters
1761         if (eoffset == -1) {
1762             eoffset = component.length -1;
1763         }
1764         for (int i = soffset; i <= eoffset; i++) {
1765             if (!generous.get(component[i])) return false;
1766         }
1767         return true;
1768     }
1769 
1770 
1771     /***
1772      * In order to avoid any possilbity of conflict with non-ASCII characters,
1773      * Parse a URI reference as a <code>String</code> with the character
1774      * encoding of the local system or the document.
1775      * <p>
1776      * The following line is the regular expression for breaking-down a URI
1777      * reference into its components.
1778      * <p><blockquote><pre>
1779      *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1780      *    12            3  4          5       6  7        8 9
1781      * </pre></blockquote><p>
1782      * For example, matching the above expression to
1783      *   http://jakarta.apache.org/ietf/uri/#Related
1784      * results in the following subexpression matches:
1785      * <p><blockquote><pre>
1786      *               $1 = http:
1787      *  scheme    =  $2 = http
1788      *               $3 = //jakarta.apache.org
1789      *  authority =  $4 = jakarta.apache.org
1790      *  path      =  $5 = /ietf/uri/
1791      *               $6 = <undefined>
1792      *  query     =  $7 = <undefined>
1793      *               $8 = #Related
1794      *  fragment  =  $9 = Related
1795      * </pre></blockquote><p>
1796      *
1797      * @param original the original character sequence
1798      * @param escaped <code>true</code> if <code>original</code> is escaped
1799      * @return the original character sequence
1800      * @exception IOException
1801      */
1802     protected void parseUriReference(String original, boolean escaped)
1803         throws IOException {
1804 
1805         // validate and contruct the URI character sequence
1806         if (original == null || original.length() == 0) {
1807             throw new IOException("URI-Reference required");
1808         }
1809 
1810         /*** @
1811          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1812          */
1813         String tmp = original.trim();
1814         
1815         /***
1816          * The length of the string sequence of characters.
1817          * It may not be equal to the length of the byte array.
1818          */
1819         int length = tmp.length();
1820 
1821         /***
1822          * Remove the delimiters like angle brackets around an URI.
1823          */
1824         char[] firstDelimiter = { tmp.charAt(0) };
1825         if (validate(firstDelimiter, delims)) {
1826             if (length >= 2) {
1827                 char[] lastDelimiter = { tmp.charAt(length - 1) };
1828                 if (validate(lastDelimiter, delims)) {
1829                     tmp = tmp.substring(1, length - 1);
1830                     length = length - 2;
1831                 }
1832             }
1833         }
1834 
1835         /***
1836          * The starting index
1837          */
1838         int from = 0;
1839 
1840         /***
1841          * The test flag whether the URI is started from the path component.
1842          */
1843         boolean isStartedFromPath = false;
1844         int atColon = tmp.indexOf(':');
1845         int atSlash = tmp.indexOf('/');
1846         if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) {
1847             isStartedFromPath = true;
1848         }
1849 
1850         /***
1851          * <p><blockquote><pre>
1852          *     @@@@@@@@
1853          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1854          * </pre></blockquote><p>
1855          */
1856         int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1857         if (at == -1) at = 0;
1858 
1859         /***
1860          * Parse the scheme.
1861          * <p><blockquote><pre>
1862          *  scheme    =  $2 = http
1863          *              @
1864          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1865          * </pre></blockquote><p>
1866          */
1867         if (at < length && tmp.charAt(at) == ':') {
1868             char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1869             if (validate(target, scheme)) {
1870                 _scheme = target;
1871             } else {
1872                 throw new IOException("incorrect scheme");
1873             }
1874             from = ++at;
1875         }
1876 
1877         /***
1878          * Parse the authority component.
1879          * <p><blockquote><pre>
1880          *  authority =  $4 = jakarta.apache.org
1881          *                  @@
1882          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1883          * </pre></blockquote><p>
1884          */
1885         // Reset flags
1886         _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1887         if (0 <= at && at < length && tmp.charAt(at) == '/') {
1888             // Set flag
1889             _is_hier_part = true;
1890             if (at + 2 < length && tmp.charAt(at + 1) == '/') {
1891                 // the temporary index to start the search from
1892                 int next = indexFirstOf(tmp, "/?#", at + 2);
1893                 if (next == -1) {
1894                     next = (tmp.substring(at + 2).length() == 0) ? at + 2 :
1895                     tmp.length();
1896                 }
1897                 parseAuthority(tmp.substring(at + 2, next), escaped);
1898                 from = at = next;
1899                 // Set flag
1900                 _is_net_path = true;
1901             }
1902             if (from == at) {
1903                 // Set flag
1904                 _is_abs_path = true;
1905             }
1906         }
1907 
1908         /***
1909          * Parse the path component.
1910          * <p><blockquote><pre>
1911          *  path      =  $5 = /ietf/uri/
1912          *                                @@@@@@
1913          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1914          * </pre></blockquote><p>
1915          */
1916         if (from < length) {
1917             // rel_path = rel_segment [ abs_path ]
1918             int next = indexFirstOf(tmp, "?#", from);
1919             if (next == -1) {
1920                 next = tmp.length();
1921             }
1922             if (!_is_abs_path) {
1923                 if (!escaped && prevalidate(tmp.substring(from, next),
1924                             disallowed_rel_path) || escaped &&
1925                         validate(tmp.substring(from, next).toCharArray(),
1926                             rel_path)) {
1927                     // Set flag
1928                     _is_rel_path = true;
1929                 } else if (!escaped && prevalidate(tmp.substring(from, next),
1930                             disallowed_opaque_part) || escaped &&
1931                         validate(tmp.substring(from, next).toCharArray(),
1932                             opaque_part)) {
1933                     // Set flag
1934                     _is_opaque_part = true;
1935                 } else {
1936                     // the path component may be empty
1937                     _path = null;
1938                 }
1939             }
1940             setPath(tmp.substring(from, next));
1941             at = next;
1942         }
1943 
1944         /***
1945          * Parse the query component.
1946          * <p><blockquote><pre>
1947          *  query     =  $7 = <undefined>
1948          *                                        @@@@@@@@@
1949          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1950          * </pre></blockquote><p>
1951          */
1952         if (0 <= at && at+1 < length && tmp.charAt(at) == '?') {
1953             int next = tmp.indexOf('#', at + 1);
1954             if (next == -1) {
1955                 next = tmp.length();
1956             }
1957             _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() :
1958             encode(tmp.substring(at + 1, next), allowed_query);
1959             at = next;
1960         }
1961 
1962         /***
1963          * Parse the fragment component.
1964          * <p><blockquote><pre>
1965          *  fragment  =  $9 = Related
1966          *                                                   @@@@@@@@
1967          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1968          * </pre></blockquote><p>
1969          */
1970         if (0 <= at && at+1 < length && tmp.charAt(at) == '#') {
1971             _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() :
1972             encode(tmp.substring(at + 1), allowed_fragment);
1973         }
1974 
1975         // set this URI.
1976         setUriReference();
1977     }
1978 
1979 
1980     /***
1981      * Get the earlier index that to be searched for the first occurrance in
1982      * one of any of the given string.
1983      *
1984      * @param s the string to be indexed
1985      * @param delims the delimiters used to index
1986      * @return the earlier index if there are delimiters
1987      */
1988     protected int indexFirstOf(String s, String delims) {
1989         return indexFirstOf(s, delims, -1);
1990     }
1991 
1992 
1993     /***
1994      * Get the earlier index that to be searched for the first occurrance in
1995      * one of any of the given string.
1996      *
1997      * @param s the string to be indexed
1998      * @param delims the delimiters used to index
1999      * @param offset the from index
2000      * @return the earlier index if there are delimiters
2001      */
2002     protected int indexFirstOf(String s, String delims, int offset) {
2003         if (s == null || s.length() == 0) {
2004             return -1;
2005         }
2006         if (delims == null || delims.length() == 0) {
2007             return -1;
2008         }
2009         // check boundaries
2010         if (offset < 0) {
2011             offset = 0;
2012         } else if (offset > s.length()) {
2013             return -1;
2014         }
2015         // s is never null
2016         int min = s.length();
2017         char[] delim = delims.toCharArray();
2018         for (int i = 0; i < delim.length; i++) {
2019             int at = s.indexOf(delim[i], offset);
2020             if (at >= 0 && at < min) {
2021                 min = at;
2022             }
2023         }
2024         return (min == s.length()) ? -1 : min;
2025     }
2026 
2027 
2028     /***
2029      * Get the earlier index that to be searched for the first occurrance in
2030      * one of any of the given array.
2031      *
2032      * @param s the character array to be indexed
2033      * @param delim the delimiter used to index
2034      * @return the ealier index if there are a delimiter
2035      */
2036     protected int indexFirstOf(char[] s, char delim) {
2037         return indexFirstOf(s, delim, 0);
2038     }
2039 
2040 
2041     /***
2042      * Get the earlier index that to be searched for the first occurrance in
2043      * one of any of the given array.
2044      *
2045      * @param s the character array to be indexed
2046      * @param delim the delimiter used to index
2047      * @return the ealier index if there is a delimiter
2048      */
2049     protected int indexFirstOf(char[] s, char delim, int offset) {
2050         if (s == null || s.length == 0) {
2051             return -1;
2052         }
2053         // check boundaries
2054         if (offset < 0) {
2055             offset = 0;
2056         } else if (offset > s.length) {
2057             return -1;
2058         }
2059         for (int i = offset; i < s.length; i++) {
2060             if (s[i] == delim) {
2061                 return i;
2062             }
2063         }
2064         return -1;
2065     }
2066 
2067 
2068     /***
2069      * Parse the authority component.
2070      *
2071      * @param original the original character sequence of authority component
2072      * @param escaped <code>true</code> if <code>original</code> is escaped
2073      * @exception IOException
2074      */
2075     protected void parseAuthority(String original, boolean escaped)
2076         throws IOException {
2077 
2078         // Reset flags
2079         _is_reg_name = _is_server =
2080         _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2081 
2082         boolean has_port = true;
2083         int from = 0;
2084         int next = original.indexOf('@');
2085         if (next != -1) { // neither -1 and 0
2086             // each protocol extented from URI supports the specific userinfo
2087             _userinfo = (escaped) ? original.substring(0, next).toCharArray() :
2088             encode(original.substring(0, next), allowed_userinfo);
2089             from = next + 1;
2090         }
2091         next = original.indexOf('[', from);
2092         if (next >= from) {
2093             next = original.indexOf(']', from);
2094             if (next == -1) {
2095                 throw new IOException(/* IOException.PARSING,*/ "URI: IPv6reference");
2096             } else {
2097                 next++;
2098             }
2099             // In IPv6reference, '[', ']' should be excluded
2100             _host = (escaped) ? original.substring(from, next).toCharArray() :
2101             encode(original.substring(from, next), allowed_IPv6reference);
2102             // Set flag
2103             _is_IPv6reference = true;
2104         } else { // only for !_is_IPv6reference
2105             next = original.indexOf(':', from);
2106             if (next == -1) {
2107                 next = original.length();
2108                 has_port = false;
2109             }
2110             // REMINDME: it doesn't need the pre-validation
2111             _host = original.substring(from, next).toCharArray();
2112             if (validate(_host, IPv4address)) {
2113                 // Set flag
2114                 _is_IPv4address = true;
2115             } else if (validate(_host, hostname)) {
2116                 // Set flag
2117                 _is_hostname = true;
2118             } else {
2119                 // Set flag
2120                 _is_reg_name = true;
2121             }
2122         }
2123         if (_is_reg_name) {
2124             // Reset flags for a server-based naming authority
2125             _is_server = _is_hostname = _is_IPv4address =
2126             _is_IPv6reference = false;
2127             // set a registry-based naming authority
2128             _authority = (escaped) ? original.toString().toCharArray() :
2129             encode(original.toString(), allowed_reg_name);
2130         } else {
2131             if (original.length()-1 > next && has_port &&
2132                     original.charAt(next) == ':') { // not empty
2133                 from = next + 1;
2134                 try {
2135                     _port = Integer.parseInt(original.substring(from));
2136                 } catch (NumberFormatException error) {
2137                     throw new IOException(/*IOException.PARSING, */
2138                             "URI: invalid port number");
2139                 }
2140             }
2141             // set a server-based naming authority
2142             StringBuffer buf = new StringBuffer();
2143             if (_userinfo != null) { // has_userinfo
2144                 buf.append(_userinfo);
2145                 buf.append('@');
2146             }
2147             if (_host != null) {
2148                 buf.append(_host);
2149                 if (_port != -1) {
2150                     buf.append(':');
2151                     buf.append(_port);
2152                 }
2153             }
2154             _authority = buf.toString().toCharArray();
2155             // Set flag
2156             _is_server = true;
2157         }
2158     }
2159 
2160 
2161     /***
2162      * Once it's parsed successfully, set this URI.
2163      *
2164      * @see #getRawURI
2165      */
2166     protected void setUriReference() {
2167         // set _uri
2168         StringBuffer buf = new StringBuffer();
2169         // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2170         if (_scheme != null) {
2171             buf.append(_scheme);
2172             buf.append(':');
2173         }
2174         if (_is_net_path) {
2175             buf.append("//");
2176             if (_authority != null) { // has_authority
2177                 if (_userinfo != null) { // by default, remove userinfo part
2178                     if (_host != null) {
2179                         buf.append(_host);
2180                         if (_port != -1) {
2181                             buf.append(':');
2182                             buf.append(_port);
2183                         }
2184                     }
2185                 } else {
2186                     buf.append(_authority);
2187                 }
2188             }
2189         }
2190         if (_opaque != null && _is_opaque_part) {
2191             buf.append(_opaque);
2192         } else if (_path != null) {
2193             // _is_hier_part or _is_relativeURI
2194             if (_path.length != 0) {
2195                 buf.append(_path);
2196             }
2197         }
2198         if (_query != null) { // has_query
2199             buf.append('?');
2200             buf.append(_query);
2201         }
2202         if (_fragment != null) { // has_fragment
2203             buf.append('#');
2204             buf.append(_fragment);
2205         }
2206 
2207         _uri = buf.toString().toCharArray();
2208     }
2209 
2210     // ----------------------------------------------------------- Test methods
2211   
2212 
2213     /***
2214      * Tell whether or not this URI is absolute.
2215      *
2216      * @return true iif this URI is absoluteURI
2217      */
2218     public boolean isAbsoluteURI() {
2219         return (_scheme != null);
2220     }
2221   
2222 
2223     /***
2224      * Tell whether or not this URI is relative.
2225      *
2226      * @return true iif this URI is relativeURI
2227      */
2228     public boolean isRelativeURI() {
2229         return (_scheme == null);
2230     }
2231 
2232 
2233     /***
2234      * Tell whether or not the absoluteURI of this URI is hier_part.
2235      *
2236      * @return true iif the absoluteURI is hier_part
2237      */
2238     public boolean isHierPart() {
2239         return _is_hier_part;
2240     }
2241 
2242 
2243     /***
2244      * Tell whether or not the absoluteURI of this URI is opaque_part.
2245      *
2246      * @return true iif the absoluteURI is opaque_part
2247      */
2248     public boolean isOpaquePart() {
2249         return _is_opaque_part;
2250     }
2251 
2252 
2253     /***
2254      * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2255      * It's the same function as the has_authority() method.
2256      *
2257      * @return true iif the relativeURI or heir_part is net_path
2258      * @see #hasAuthority
2259      */
2260     public boolean isNetPath() {
2261         return _is_net_path || (_authority != null);
2262     }
2263 
2264 
2265     /***
2266      * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2267      *
2268      * @return true iif the relativeURI or hier_part is abs_path
2269      */
2270     public boolean isAbsPath() {
2271         return _is_abs_path;
2272     }
2273 
2274 
2275     /***
2276      * Tell whether or not the relativeURI of this URI is rel_path.
2277      *
2278      * @return true iif the relativeURI is rel_path
2279      */
2280     public boolean isRelPath() {
2281         return _is_rel_path;
2282     }
2283 
2284 
2285     /***
2286      * Tell whether or not this URI has authority.
2287      * It's the same function as the is_net_path() method.
2288      *
2289      * @return true iif this URI has authority
2290      * @see #isNetPath
2291      */
2292     public boolean hasAuthority() {
2293         return (_authority != null) || _is_net_path;
2294     }
2295 
2296     /***
2297      * Tell whether or not the authority component of this URI is reg_name.
2298      *
2299      * @return true iif the authority component is reg_name
2300      */
2301     public boolean isRegName() {
2302         return _is_reg_name;
2303     }
2304   
2305 
2306     /***
2307      * Tell whether or not the authority component of this URI is server.
2308      *
2309      * @return true iif the authority component is server
2310      */
2311     public boolean isServer() {
2312         return _is_server;
2313     }
2314   
2315 
2316     /***
2317      * Tell whether or not this URI has userinfo.
2318      *
2319      * @return true iif this URI has userinfo
2320      */
2321     public boolean hasUserinfo() {
2322         return (_userinfo != null);
2323     }
2324   
2325 
2326     /***
2327      * Tell whether or not the host part of this URI is hostname.
2328      *
2329      * @return true iif the host part is hostname
2330      */
2331     public boolean isHostname() {
2332         return _is_hostname;
2333     }
2334 
2335 
2336     /***
2337      * Tell whether or not the host part of this URI is IPv4address.
2338      *
2339      * @return true iif the host part is IPv4address
2340      */
2341     public boolean isIPv4address() {
2342         return _is_IPv4address;
2343     }
2344 
2345 
2346     /***
2347      * Tell whether or not the host part of this URI is IPv6reference.
2348      *
2349      * @return true iif the host part is IPv6reference
2350      */
2351     public boolean isIPv6reference() {
2352         return _is_IPv6reference;
2353     }
2354 
2355 
2356     /***
2357      * Tell whether or not this URI has query.
2358      *
2359      * @return true iif this URI has query
2360      */
2361     public boolean hasQuery() {
2362         return (_query != null);
2363     }
2364    
2365 
2366     /***
2367      * Tell whether or not this URI has fragment.
2368      *
2369      * @return true iif this URI has fragment
2370      */
2371     public boolean hasFragment() {
2372         return (_fragment != null);
2373     }
2374    
2375    
2376     // ---------------------------------------------------------------- Charset
2377 
2378 
2379     /***
2380      * Set the default charset of the protocol.
2381      * <p>
2382      * The character set used to store files SHALL remain a local decision and
2383      * MAY depend on the capability of local operating systems. Prior to the
2384      * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2385      * and UTF-8 encoded. This approach, while allowing international exchange
2386      * of URIs, will still allow backward compatibility with older systems
2387      * because the code set positions for ASCII characters are identical to the
2388      * one byte sequence in UTF-8.
2389      * <p>
2390      * An individual URI scheme may require a single charset, define a default
2391      * charset, or provide a way to indicate the charset used.
2392      *
2393      * @param charset the default charset for each protocol
2394      */
2395     public static void setProtocolCharset(String charset) {
2396         _protocolCharset = charset;
2397     }
2398 
2399 
2400     /***
2401      * Get the default charset of the protocol.
2402      * <p>
2403      * An individual URI scheme may require a single charset, define a default
2404      * charset, or provide a way to indicate the charset used.
2405      * <p>
2406      * To work globally either requires support of a number of character sets
2407      * and to be able to convert between them, or the use of a single preferred
2408      * character set.
2409      * For support of global compatibility it is STRONGLY RECOMMENDED that
2410      * clients and servers use UTF-8 encoding when exchanging URIs.
2411      *
2412      * @return the charset string
2413      */
2414     public static String getProtocolCharset() {
2415         return _protocolCharset;
2416     }
2417 
2418 
2419     /***
2420      * Set the default charset of the document.
2421      * <p>
2422      * Notice that it will be possible to contain mixed characters (e.g.
2423      * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2424      * display of these character sets, the protocol charset could be simply
2425      * used again. Because it's not yet implemented that the insertion of BIDI
2426      * control characters at different points during composition is extracted.
2427      *
2428      * @param charset the default charset for the document
2429      */
2430     public static void setDocumentCharset(String charset) {
2431         _documentCharset = charset;
2432     }
2433 
2434 
2435     /***
2436      * Get the default charset of the document.
2437      *
2438      * @return the charset string
2439      */
2440     public static String getDocumentCharset() {
2441         return _documentCharset;
2442     }
2443 
2444     // ------------------------------------------------------------- The scheme
2445 
2446     /***
2447      * Get the scheme.
2448      *
2449      * @return the scheme
2450      */
2451     public char[] getRawScheme() {
2452         return _scheme;
2453     }
2454 
2455 
2456     /***
2457      * Get the scheme.
2458      *
2459      * @return the scheme
2460      * null if undefined scheme
2461      */
2462     public String getScheme() {
2463         return (_scheme == null) ? null : new String(_scheme);
2464     }
2465 
2466     // ---------------------------------------------------------- The authority
2467 
2468     /***
2469      * Set the authority.  It can be one type of server, hostport, hostname,
2470      * IPv4address, IPv6reference and reg_name.
2471      * <p><blockquote><pre>
2472      *   authority     = server | reg_name
2473      * </pre></blockquote><p>
2474      *
2475      * @param escapedAuthority the raw escaped authority
2476      * @exception IOException
2477      * @throws NullPointerException null authority
2478      */
2479     public void setRawAuthority(char[] escapedAuthority) throws IOException {
2480         parseAuthority(new String(escapedAuthority), true);
2481         setUriReference();
2482     }
2483 
2484 
2485     /***
2486      * Set the authority.  It can be one type of server, hostport, hostname,
2487      * IPv4address, IPv6reference and reg_name.
2488      * Note that there is no setAuthority method by the escape encoding reason.
2489      *
2490      * @param escapedAuthority the escaped authority string
2491      * @exception IOException
2492      */
2493     public void setEscapedAuthority(String escapedAuthority)
2494         throws IOException {
2495 
2496         parseAuthority(escapedAuthority, true);
2497         setUriReference();
2498     }
2499 
2500 
2501     /***
2502      * Get the raw-escaped authority.
2503      *
2504      * @return the raw-escaped authority
2505      */
2506     public char[] getRawAuthority() {
2507         return _authority;
2508     }
2509 
2510 
2511     /***
2512      * Get the escaped authority.
2513      *
2514      * @return the escaped authority
2515      */
2516     public String getEscapedAuthority() {
2517         return (_authority == null) ? null : new String(_authority);
2518     }
2519 
2520 
2521     /***
2522      * Get the authority.
2523      *
2524      * @return the authority
2525      * @exception IOException
2526      * @see #decode
2527      */
2528     public String getAuthority() throws IOException {
2529         return (_authority == null) ? null : decode(_authority);
2530     }
2531 
2532     // ----------------------------------------------------------- The userinfo
2533 
2534     /***
2535      * Get the raw-escaped userinfo.
2536      *
2537      * @return the raw-escaped userinfo
2538      * @see #getAuthority
2539      */
2540     public char[] getRawUserinfo() {
2541         return _userinfo;
2542     }
2543 
2544 
2545     /***
2546      * Get the escaped userinfo.
2547      *
2548      * @return the escaped userinfo
2549      * @see #getAuthority
2550      */
2551     public String getEscapedUserinfo() {
2552         return (_userinfo == null) ? null : new String(_userinfo);
2553     }
2554 
2555 
2556     /***
2557      * Get the userinfo.
2558      *
2559      * @return the userinfo
2560      * @exception IOException
2561      * @see #decode
2562      * @see #getAuthority
2563      */
2564     public String getUserinfo() throws IOException {
2565         return (_userinfo == null) ? null : decode(_userinfo);
2566     }
2567 
2568     // --------------------------------------------------------------- The host
2569 
2570     /***
2571      * Get the host.
2572      * <p><blockquote><pre>
2573      *   host          = hostname | IPv4address | IPv6reference
2574      * </pre></blockquote><p>
2575      *
2576      * @return the host
2577      * @see #getAuthority
2578      */
2579     public char[] getRawHost() {
2580         return _host;
2581     }
2582 
2583 
2584     /***
2585      * Get the host.
2586      * <p><blockquote><pre>
2587      *   host          = hostname | IPv4address | IPv6reference
2588      * </pre></blockquote><p>
2589      *
2590      * @return the host
2591      * @exception IOException
2592      * @see #decode
2593      * @see #getAuthority
2594      */
2595     public String getHost() throws IOException {
2596         return decode(_host);
2597     }
2598 
2599     // --------------------------------------------------------------- The port
2600 
2601     /***
2602      * Get the port.  In order to get the specfic default port, the specific
2603      * protocol-supported class extended from the URI class should be used.
2604      * It has the server-based naming authority.
2605      *
2606      * @return the port
2607      * if -1, it has the default port for the scheme or the server-based
2608      * naming authority is not supported in the specific URI.
2609      */
2610     public int getPort() {
2611         return _port;
2612     }
2613 
2614     // --------------------------------------------------------------- The path
2615 
2616     /***
2617      * Set the path.   The method couldn't be used by API programmers.
2618      *
2619      * @param path the path string
2620      * @exception IOException set incorrectly or fragment only
2621      * @see #encode
2622      */
2623     protected void setPath(String path) throws IOException {
2624 
2625         // set path
2626         if (_is_net_path || _is_abs_path) {
2627             _path = encode(path, allowed_abs_path);
2628         } else if (_is_rel_path) {
2629             StringBuffer buff = new StringBuffer(path.length());
2630             int at = path.indexOf('/');
2631             if (at > 0) {  // never 0
2632                 buff.append(encode(path.substring(0, at), allowed_rel_path));
2633                 buff.append(encode(path.substring(at), allowed_abs_path));
2634             } else {
2635                 buff.append(encode(path, allowed_rel_path));
2636             }
2637             _path = buff.toString().toCharArray();
2638         } else if (_is_opaque_part) {
2639             _opaque = encode(path, allowed_opaque_part);
2640         } else {
2641             throw new IOException(/*IOException.PARSING, */"URI: incorrect path");
2642         }
2643     }
2644 
2645 
2646     /***
2647      * Resolve the base and relative path.
2648      *
2649      * @param base_path a character array of the base_path
2650      * @param rel_path a character array of the rel_path
2651      * @return the resolved path
2652      */
2653     protected char[] resolvePath(char[] base_path, char[] rel_path) {
2654 
2655         // REMINDME: paths are never null
2656         String base = (base_path == null) ? "" : new String(base_path);
2657         int at = base.lastIndexOf('/');
2658         if (at != -1) {
2659             base_path = base.substring(0, at + 1).toCharArray();
2660         }
2661         // _path could be empty
2662         if (rel_path == null || rel_path.length == 0) {
2663             return normalize(base_path);
2664         } else if (rel_path[0] == '/') {
2665             return rel_path;
2666         } else {
2667             StringBuffer buff = new StringBuffer(base.length() +
2668                 rel_path.length);
2669             if (at != -1) {
2670                 buff.append(base.substring(0, at + 1));
2671                 buff.append(rel_path);
2672             }
2673             return normalize(buff.toString().toCharArray());
2674         }
2675     }
2676 
2677 
2678     /***
2679      * Get the raw-escaped current hierarchy level in the given path.
2680      * If the last namespace is a collection, the slash mark ('/') should be
2681      * ended with at the last character of the path string.
2682      *
2683      * @param path the path
2684      * @return the current hierarchy level
2685      * @exception IOException no hierarchy level
2686      */
2687     protected char[] getRawCurrentHierPath(char[] path) throws IOException {
2688 
2689         if (_is_opaque_part) {
2690             throw new IOException(/*IOException.PARSING,*/ "URI: no hierarchy level");
2691         }
2692         if (path == null) {
2693             throw new IOException(/*IOException.PARSING,*/ "URI: emtpy path");
2694         }
2695         String buff = new String(path);
2696         int first = buff.indexOf('/');
2697         int last = buff.lastIndexOf('/');
2698         if (last == 0) {
2699             return rootPath;
2700         } else if (first != last && last != -1) {
2701             return buff.substring(0, last).toCharArray();
2702         }
2703         // FIXME: it could be a document on the server side
2704         return path;
2705     }
2706 
2707 
2708     /***
2709      * Get the raw-escaped current hierarchy level.
2710      *
2711      * @return the raw-escaped current hierarchy level
2712      * @exception IOException no hierarchy level
2713      */
2714     public char[] getRawCurrentHierPath() throws IOException {
2715         return (_path == null) ? null : getRawCurrentHierPath(_path);
2716     }
2717  
2718 
2719     /***
2720      * Get the escaped current hierarchy level.
2721      *
2722      * @return the escaped current hierarchy level
2723      * @exception IOException no hierarchy level
2724      */
2725     public String getEscapedCurrentHierPath() throws IOException {
2726         char[] path = getRawCurrentHierPath();
2727         return (path == null) ? null : new String(path);
2728     }
2729  
2730 
2731     /***
2732      * Get the current hierarchy level.
2733      *
2734      * @return the current hierarchy level
2735      * @exception IOException
2736      * @see #decode
2737      */
2738     public String getCurrentHierPath() throws IOException {
2739         char[] path = getRawCurrentHierPath();
2740         return (path == null) ? null : decode(path);
2741     }
2742 
2743 
2744     /***
2745      * Get the level above the this hierarchy level.
2746      *
2747      * @return the raw above hierarchy level
2748      * @exception IOException
2749      */
2750     public char[] getRawAboveHierPath() throws IOException {
2751         char[] path = getRawCurrentHierPath();
2752         return (path == null) ? null : getRawCurrentHierPath(path);
2753     }
2754 
2755 
2756     /***
2757      * Get the level above the this hierarchy level.
2758      *
2759      * @return the raw above hierarchy level
2760      * @exception IOException
2761      */
2762     public String getEscapedAboveHierPath() throws IOException {
2763         char[] path = getRawAboveHierPath();
2764         return (path == null) ? null : new String(path);
2765     }
2766 
2767 
2768     /***
2769      * Get the level above the this hierarchy level.
2770      *
2771      * @return the above hierarchy level
2772      * @exception IOException
2773      * @see #decode
2774      */
2775     public String getAboveHierPath() throws IOException {
2776         char[] path = getRawAboveHierPath();
2777         return (path == null) ? null : decode(path);
2778     }
2779 
2780 
2781     /***
2782      * Get the raw-escaped path.
2783      * <p><blockquote><pre>
2784      *   path          = [ abs_path | opaque_part ]
2785      * </pre></blockquote><p>
2786      *
2787      * @return the raw-escaped path
2788      */
2789     public char[] getRawPath() {
2790         return _is_opaque_part ? _opaque : _path;
2791     }
2792 
2793 
2794     /***
2795      * Get the escaped path.
2796      * <p><blockquote><pre>
2797      *   path          = [ abs_path | opaque_part ]
2798      *   abs_path      = "/"  path_segments 
2799      *   opaque_part   = uric_no_slash *uric
2800      * </pre></blockquote><p>
2801      *
2802      * @return the escaped path string
2803      */
2804     public String getEscapedPath() {
2805         char[] path = getRawPath();
2806         return (path == null) ? null : new String(path);
2807     }
2808 
2809 
2810     /***
2811      * Get the path.
2812      * <p><blockquote><pre>
2813      *   path          = [ abs_path | opaque_part ]
2814      * </pre></blockquote><p>
2815      * @return the path string
2816      * @exception IOException
2817      * @see #decode
2818      */
2819     public String getPath() throws IOException { 
2820         char[] path =  getRawPath();
2821         return (path == null) ? null : decode(path);
2822     }
2823 
2824 
2825     /***
2826      * Get the raw-escaped basename of the path.
2827      *
2828      * @return the raw-escaped basename
2829      */
2830     public char[] getRawName() {
2831         if (_path == null) return null;
2832 
2833         int at = 0;
2834         for (int i = _path.length - 1; i >= 0; i--) {
2835             if (_path[i] == '/') {
2836                 at = i + 1;
2837                 break;
2838             }
2839         }
2840         int len = _path.length - at;
2841         char[] basename =  new char[len];
2842         System.arraycopy(_path, at, basename, 0, len);
2843         return basename;
2844     }
2845 
2846 
2847     /***
2848      * Get the escaped basename of the path.
2849      *
2850      * @return the escaped basename string
2851      */
2852     public String getEscapedName() {
2853         char[] basename = getRawName();
2854         return (basename == null) ? null : new String(basename);
2855     }
2856 
2857 
2858     /***
2859      * Get the basename of the path.
2860      *
2861      * @return the basename string
2862      * @exception IOException incomplete trailing escape pattern
2863      * Or unsupported character encoding
2864      * @see #decode
2865      */
2866     public String getName() throws IOException {
2867         char[] basename = getRawName();
2868         return (basename == null) ? null : decode(getRawName());
2869     }
2870 
2871     // ----------------------------------------------------- The path and query 
2872 
2873     /***
2874      * Get the raw-escaped path and query.
2875      *
2876      * @return the raw-escaped path and query
2877      */
2878     public char[] getRawPathQuery() {
2879 
2880         if (_path == null && _query == null) {
2881             return null;
2882         }
2883         StringBuffer buff = new StringBuffer();
2884         if (_path != null) {
2885             buff.append(_path);
2886         }
2887         if (_query != null) {
2888             buff.append('?');
2889             buff.append(_query);
2890         }
2891         return buff.toString().toCharArray();
2892     }
2893 
2894 
2895     /***
2896      * Get the escaped query.
2897      *
2898      * @return the escaped path and query string
2899      */
2900     public String getEscapedPathQuery() {
2901         char[] rawPathQuery = getRawPathQuery();
2902         return (rawPathQuery == null) ? null : new String(rawPathQuery);
2903     }
2904 
2905 
2906     /***
2907      * Get the path and query.
2908      *
2909      * @return the path and query string.
2910      * @exception IOException incomplete trailing escape pattern
2911      * Or unsupported character encoding
2912      * @see #decode
2913      */
2914     public String getPathQuery() throws IOException {
2915         char[] rawPathQuery = getRawPathQuery();
2916         return (rawPathQuery == null) ? null : decode(rawPathQuery);
2917     }
2918 
2919     // -------------------------------------------------------------- The query 
2920 
2921     /***
2922      * Set the raw-escaped query.
2923      *
2924      * @param escapedQuery the raw-escaped query
2925      * @exception IOException escaped query not valid
2926      * @throws NullPointerException null query
2927      */
2928     public void setRawQuery(char[] escapedQuery) throws IOException {
2929         if (!validate(escapedQuery, query))
2930             throw new IOException(/*IOException.ESCAPING,*/
2931                     "URI: escaped query not valid");
2932         _query = escapedQuery;
2933         setUriReference();
2934     }
2935 
2936 
2937     /***
2938      * Set the escaped query string.
2939      *
2940      * @param escapedQuery the escaped query string
2941      * @exception IOException escaped query not valid
2942      * @throws NullPointerException null query
2943      */
2944     public void setEscapedQuery(String escapedQuery) throws IOException {
2945         setRawQuery(escapedQuery.toCharArray());
2946     }
2947 
2948 
2949     /***
2950      * Set the query.
2951      * When a query string is not misunderstood the reserved special characters
2952      * ("&amp;", "=", "+", ",", and "$") within a query component, it is
2953      * recommended to use in encoding the whole query with this method.
2954      *
2955      * @param query the query string.
2956      * @exception IOException incomplete trailing escape pattern
2957      * Or unsupported character encoding
2958      * @throws NullPointerException null query
2959      * @see #encode
2960      */
2961     public void setQuery(String query) throws IOException {
2962         setRawQuery(encode(query, allowed_query));
2963     }
2964 
2965 
2966     /***
2967      * Get the raw-escaped query.
2968      *
2969      * @return the raw-escaped query
2970      */
2971     public char[] getRawQuery() {
2972         return _query;
2973     }
2974 
2975 
2976     /***
2977      * Get the escaped query.
2978      *
2979      * @return the escaped query string
2980      */
2981     public String getEscapedQuery() {
2982         return (_query == null) ? null : new String(_query);
2983     }
2984 
2985 
2986     /***
2987      * Get the query.
2988      *
2989      * @return the query string.
2990      * @exception IOException incomplete trailing escape pattern
2991      * Or unsupported character encoding
2992      * @see #decode
2993      */
2994     public String getQuery() throws IOException {
2995         return (_query == null) ? null : decode(_query);
2996     }
2997 
2998     // ----------------------------------------------------------- The fragment 
2999 
3000     /***
3001      * Set the raw-escaped fragment.
3002      *
3003      * @param escapedFragment the raw-escaped fragment
3004      * @exception IOException escaped fragment not valid
3005      * @throws NullPointerException null fragment
3006      */
3007     public void setRawFragment(char[] escapedFragment) throws IOException {
3008         if (!validate(escapedFragment, fragment))
3009             throw new IOException(/*IOException.ESCAPING,*/
3010                     "URI: escaped fragment not valid");
3011         _fragment = escapedFragment;
3012         setUriReference();
3013     }
3014 
3015 
3016     /***
3017      * Set the escaped fragment string.
3018      *
3019      * @param escapedFragment the escaped fragment string
3020      * @exception IOException escaped fragment not valid
3021      * @throws NullPointerException null fragment
3022      */
3023     public void setEscapedFragment(String escapedFragment) throws IOException {
3024         char[] fragmentSequence = escapedFragment.toCharArray();
3025         if (!validate(fragmentSequence, fragment))
3026             throw new IOException(/*IOException.ESCAPING,*/
3027                     "URI: escaped fragment not valid");
3028         _fragment = fragmentSequence;
3029         setUriReference();
3030     }
3031 
3032 
3033     /***
3034      * Set the fragment.
3035      *
3036      * @param the fragment string.
3037      * @exception IOException
3038      * Or unsupported character encoding
3039      * @throws NullPointerException null fragment
3040      */
3041     public void setFragment(String fragment) throws IOException {
3042         _fragment = encode(fragment, allowed_fragment);
3043         setUriReference();
3044     }
3045 
3046 
3047     /***
3048      * Get the raw-escaped fragment.
3049      * <p>
3050      * The optional fragment identifier is not part of a URI, but is often used
3051      * in conjunction with a URI.
3052      * <p>
3053      * The format and interpretation of fragment identifiers is dependent on
3054      * the media type [RFC2046] of the retrieval result.
3055      * <p>
3056      * A fragment identifier is only meaningful when a URI reference is
3057      * intended for retrieval and the result of that retrieval is a document
3058      * for which the identified fragment is consistently defined.
3059      *
3060      * @return the raw-escaped fragment
3061      */
3062     public char[] getRawFragment() {
3063         return _fragment;
3064     }
3065 
3066 
3067     /***
3068      * Get the escaped fragment.
3069      *
3070      * @return the escaped fragment string
3071      */
3072     public String getEscapedFragment() {
3073         return (_fragment == null) ? null : new String(_fragment);
3074     }
3075 
3076 
3077     /***
3078      * Get the fragment.
3079      *
3080      * @return the fragment string
3081      * @exception IOException incomplete trailing escape pattern
3082      * Or unsupported character encoding
3083      * @see #decode
3084      */
3085     public String getFragment() throws IOException {
3086         return (_fragment == null) ? null : decode(_fragment);
3087     }
3088 
3089     // ------------------------------------------------------------- Utilities 
3090 
3091     /***
3092      * Normalize the given hier path part.
3093      *
3094      * @param path the path to normalize
3095      * @return the normalized path
3096      */
3097     protected char[] normalize(char[] path) {
3098 
3099         if (path == null) return null;
3100 
3101         String normalized = new String(path);
3102         boolean endsWithSlash = true;
3103         // precondition
3104         if (!normalized.endsWith("/")) {
3105             normalized += '/';
3106             endsWithSlash = false;
3107         }
3108         if (normalized.endsWith("/./") || normalized.endsWith("/../")) {
3109             endsWithSlash = true;
3110         }
3111         // Resolve occurrences of "/./" in the normalized path
3112         while (true) {
3113             int at = normalized.indexOf("/./");
3114             if (at == -1) {
3115                 break;
3116             }
3117             normalized = normalized.substring(0, at) +
3118             normalized.substring(at + 2);
3119         }
3120         // Resolve occurrences of "/../" in the normalized path
3121         while (true) {
3122             int at = normalized.indexOf("/../");
3123             if (at == -1) {
3124                 break;
3125             }
3126             if (at == 0) {
3127                 normalized = "/";
3128                 break;
3129             }
3130             int backward = normalized.lastIndexOf('/', at - 1);
3131             if (backward == -1) {
3132                 // consider the rel_path
3133                 normalized = normalized.substring(at + 4);
3134             } else {
3135                 normalized = normalized.substring(0, backward) +
3136                 normalized.substring(at + 3);
3137             }
3138         }
3139         // Resolve occurrences of "//" in the normalized path
3140         while (true) {
3141             int at = normalized.indexOf("//");
3142             if (at == -1) {
3143                 break;
3144             }
3145             normalized = normalized.substring(0, at) +
3146             normalized.substring(at + 1);
3147         }
3148         if (!endsWithSlash && normalized.endsWith("/")) {
3149             normalized = normalized.substring(0, normalized.length()-1);
3150         } else if (endsWithSlash && !normalized.endsWith("/")) {
3151             normalized = normalized + "/";
3152         }
3153         // Set the normalized path that we have completed
3154         return normalized.toCharArray();
3155     }
3156 
3157 
3158     /***
3159      * Normalize the path part of this URI.
3160      */
3161     public void normalize() {
3162         _path = normalize(_path);
3163     }
3164 
3165 
3166     /***
3167      * Test if the first array is equal to the second array.
3168      *
3169      * @param first the first character array
3170      * @param second the second character array
3171      * @return true if they're equal
3172      */
3173     protected boolean equals(char[] first, char[] second) {
3174 
3175         if (first == null && second == null) {
3176             return true;
3177         }
3178         if (first == null || second == null) {
3179             return false;
3180         }
3181         if (first.length != second.length) {
3182             return false;
3183         }
3184         for (int i = 0; i < first.length; i++) {
3185             if (first[i] != second[i]) {
3186                 return false;
3187             }
3188         }
3189         return true;
3190     }
3191 
3192 
3193     /***
3194      * Test an object if this URI is equal to another.
3195      *
3196      * @param obj an object to compare
3197      * @return true if two URI objects are equal
3198      */
3199     public boolean equals(Object obj) {
3200 
3201         // normalize and test each components
3202         if (obj == this) {
3203             return true;
3204         }
3205         if (!(obj instanceof URI)) {
3206             return false;
3207         }
3208         URI another = (URI) obj;
3209         // scheme
3210         if (!equals(_scheme, another._scheme)) {
3211             return false;
3212         }
3213         // is_opaque_part or is_hier_part?  and opaque
3214         if (!equals(_opaque, another._opaque)) {
3215             return false;
3216         }
3217         // is_hier_part
3218         // has_authority
3219         if (!equals(_authority, another._authority)) {
3220             return false;
3221         }
3222         // path
3223         if (!equals(_path, another._path)) {
3224             return false;
3225         }
3226         // has_query
3227         if (!equals(_query, another._query)) {
3228             return false;
3229         }
3230         // has_fragment?  should be careful of the only fragment case.
3231         if (!equals(_fragment, another._fragment)) {
3232             return false;
3233         }
3234         return true;
3235     }
3236 
3237     // ---------------------------------------------------------- Serialization
3238 
3239     /***
3240      * Write the content of this URI.
3241      *
3242      * @param oos the object-output stream
3243      */
3244     protected void writeObject(java.io.ObjectOutputStream oos)
3245         throws IOException {
3246 
3247         oos.defaultWriteObject();
3248     }
3249 
3250 
3251     /***
3252      * Read a URI.
3253      *
3254      * @param ois the object-input stream
3255      */
3256     protected void readObject(java.io.ObjectInputStream ois)
3257         throws ClassNotFoundException, IOException {
3258 
3259         ois.defaultReadObject();
3260     }
3261 
3262     // ------------------------------------------------------------- Comparison 
3263 
3264     /***
3265      * Compare this URI to another object. 
3266      *
3267      * @param obj the object to be compared.
3268      * @return 0, if it's same,
3269      * -1, if failed, first being compared with in the authority component
3270      * @exception ClassCastException not URI argument
3271      * @throws NullPointerException null object
3272      */
3273     public int compareTo(Object obj) {
3274 
3275         URI another = (URI) obj;
3276         if (!equals(_authority, another.getRawAuthority())) return -1;
3277         return toString().compareTo(another.toString());
3278     }
3279 
3280     // ------------------------------------------------------------------ Clone
3281 
3282     /***
3283      * Create and return a copy of this object, the URI-reference containing
3284      * the userinfo component.  Notice that the whole URI-reference including
3285      * the userinfo component counld not be gotten as a <code>String</code>.
3286      * <p>
3287      * To copy the identical <code>URI</code> object including the userinfo
3288      * component, it should be used.
3289      *
3290      * @return a clone of this instance
3291      */
3292     public synchronized Object clone() {
3293 
3294         URI instance = new URI();
3295 
3296         instance._uri = _uri;
3297         instance._scheme = _scheme;
3298         instance._opaque = _opaque;
3299         instance._authority = _authority;
3300         instance._userinfo = _userinfo;
3301         instance._host = _host;
3302         instance._port = _port;
3303         instance._path = _path;
3304         instance._query = _query;
3305         instance._fragment = _fragment;
3306         // flags
3307         instance._is_hier_part = _is_hier_part;
3308         instance._is_opaque_part = _is_opaque_part;
3309         instance._is_net_path = _is_net_path;
3310         instance._is_abs_path = _is_abs_path;
3311         instance._is_rel_path = _is_rel_path;
3312         instance._is_reg_name = _is_reg_name;
3313         instance._is_server = _is_server;
3314         instance._is_hostname = _is_hostname;
3315         instance._is_IPv4address = _is_IPv4address;
3316         instance._is_IPv6reference = _is_IPv6reference;
3317 
3318         return instance;
3319     }
3320 
3321     // ------------------------------------------------------------ Get the URI
3322 
3323     /***
3324      * It can be gotten the URI character sequence. It's raw-escaped.
3325      * For the purpose of the protocol to be transported, it will be useful.
3326      * <p>
3327      * It is clearly unwise to use a URL that contains a password which is
3328      * intended to be secret. In particular, the use of a password within
3329      * the 'userinfo' component of a URL is strongly disrecommended except
3330      * in those rare cases where the 'password' parameter is intended to be
3331      * public.
3332      * <p>
3333      * When you want to get each part of the userinfo, you need to use the
3334      * specific methods in the specific URL. It depends on the specific URL.
3335      *
3336      * @return URI character sequence
3337      */
3338     public char[] getRawURI() {
3339         return _uri;
3340     }
3341 
3342 
3343     /***
3344      * It can be gotten the URI character sequence. It's escaped.
3345      * For the purpose of the protocol to be transported, it will be useful.
3346      *
3347      * @return the URI string
3348      */
3349     public String getEscapedURI() {
3350         return (_uri == null) ? null : new String(_uri);
3351     }
3352     
3353 
3354     /***
3355      * It can be gotten the URI character sequence.
3356      *
3357      * @return the URI string
3358      * @exception IOException incomplete trailing escape pattern
3359      * Or unsupported character encoding
3360      * @see #decode
3361      */
3362     public String getURI() throws IOException {
3363         return (_uri == null) ? null : decode(_uri);
3364     }
3365 
3366 
3367     /***
3368      * Get the escaped URI string.
3369      * <p>
3370      * On the document, the URI-reference form is only used without the userinfo
3371      * component like http://jakarta.apache.org/ by the security reason.
3372      * But the URI-reference form with the userinfo component could be parsed.
3373      * <p>
3374      * In other words, this URI and any its subclasses must not expose the
3375      * URI-reference expression with the userinfo component like
3376      * http://user:password@hostport/restricted_zone.<br>
3377      * It means that the API client programmer should extract each user and
3378      * password to access manually.  Probably it will be supported in the each
3379      * subclass, however, not a whole URI-reference expression.
3380      *
3381      * @return the URI string
3382      * @see #clone()
3383      */
3384     public String toString() {
3385         return getEscapedURI();
3386     }
3387 
3388 
3389     // ------------------------------------------------------------ Inner class
3390 
3391     /*** 
3392      * A mapping to determine the (somewhat arbitrarily) preferred charset for 
3393      * a given locale.  Supports all locales recognized in JDK 1.1.
3394      * <p>
3395      * The distribution of this class is Servlets.com.    It was originally
3396      * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3397      */
3398     public static class LocaleToCharsetMap {
3399 
3400         private static Hashtable map;
3401         static {
3402             map = new Hashtable();
3403             map.put("ar", "ISO-8859-6");
3404             map.put("be", "ISO-8859-5");
3405             map.put("bg", "ISO-8859-5");
3406             map.put("ca", "ISO-8859-1");
3407             map.put("cs", "ISO-8859-2");
3408             map.put("da", "ISO-8859-1");
3409             map.put("de", "ISO-8859-1");
3410             map.put("el", "ISO-8859-7");
3411             map.put("en", "ISO-8859-1");
3412             map.put("es", "ISO-8859-1");
3413             map.put("et", "ISO-8859-1");
3414             map.put("fi", "ISO-8859-1");
3415             map.put("fr", "ISO-8859-1");
3416             map.put("hr", "ISO-8859-2");
3417             map.put("hu", "ISO-8859-2");
3418             map.put("is", "ISO-8859-1");
3419             map.put("it", "ISO-8859-1");
3420             map.put("iw", "ISO-8859-8");
3421             map.put("ja", "Shift_JIS");
3422             map.put("ko", "EUC-KR");
3423             map.put("lt", "ISO-8859-2");
3424             map.put("lv", "ISO-8859-2");
3425             map.put("mk", "ISO-8859-5");
3426             map.put("nl", "ISO-8859-1");
3427             map.put("no", "ISO-8859-1");
3428             map.put("pl", "ISO-8859-2");
3429             map.put("pt", "ISO-8859-1");
3430             map.put("ro", "ISO-8859-2");
3431             map.put("ru", "ISO-8859-5");
3432             map.put("sh", "ISO-8859-5");
3433             map.put("sk", "ISO-8859-2");
3434             map.put("sl", "ISO-8859-2");
3435             map.put("sq", "ISO-8859-2");
3436             map.put("sr", "ISO-8859-5");
3437             map.put("sv", "ISO-8859-1");
3438             map.put("tr", "ISO-8859-9");
3439             map.put("uk", "ISO-8859-5");
3440             map.put("zh", "GB2312");
3441             map.put("zh_TW", "Big5");
3442         }
3443        
3444         /***
3445          * Get the preferred charset for the given locale.
3446          *
3447          * @param locale the locale
3448          * @return the preferred charset
3449          * or null if the locale is not recognized
3450          */
3451         public static String getCharset(Locale locale) {
3452             // try for an full name match (may include country)
3453             String charset = (String) map.get(locale.toString());
3454             if (charset != null) return charset;
3455            
3456             // if a full name didn't match, try just the language
3457             charset = (String) map.get(locale.getLanguage());
3458             return charset;  // may be null
3459         }
3460 
3461     }
3462 
3463 }
3464