1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67 package net.wotonomy.web;
68
69 import java.io.IOException;
70 import java.io.Serializable;
71 import java.io.UnsupportedEncodingException;
72 import java.net.URL;
73 import java.security.AccessController;
74 import java.util.BitSet;
75 import java.util.Hashtable;
76 import java.util.Locale;
77
78 import sun.security.action.GetPropertyAction;
79
80 /***
81 * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
82 * This class has the purpose of supportting of parsing a URI reference to
83 * extend any specific protocols, the character encoding of the protocol to
84 * be transported and the charset of the document.
85 * <p>
86 * A URI is always in an "escaped" form, since escaping or unescaping a
87 * completed URI might change its semantics.
88 * <p>
89 * Implementers should be careful not to escape or unescape the same string
90 * more than once, since unescaping an already unescaped string might lead to
91 * misinterpreting a percent data character as another escaped character,
92 * or vice versa in the case of escaping an already escaped string.
93 * <p>
94 * In order to avoid these problems, data types used as follows:
95 * <p><blockquote><pre>
96 * URI character sequence: char
97 * octet sequence: byte
98 * original character sequence: String
99 * </pre></blockquote><p>
100 *
101 * So, a URI is a sequence of characters as an array of a char type, which
102 * is not always represented as a sequence of octets as an array of byte.
103 * <p>
104 *
105 * URI Syntactic Components
106 * <p><blockquote><pre>
107 * - In general, written as follows:
108 * Absolute URI = <scheme>:<scheme-specific-part>
109 * Generic URI = <scheme>://<authority><path>?<query>
110 *
111 * - Syntax
112 * absoluteURI = scheme ":" ( hier_part | opaque_part )
113 * hier_part = ( net_path | abs_path ) [ "?" query ]
114 * net_path = "//" authority [ abs_path ]
115 * abs_path = "/" path_segments
116 * </pre></blockquote><p>
117 *
118 * The following examples illustrate URI that are in common use.
119 * <pre>
120 * ftp://ftp.is.co.za/rfc/rfc1808.txt
121 * -- ftp scheme for File Transfer Protocol services
122 * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
123 * -- gopher scheme for Gopher and Gopher+ Protocol services
124 * http://www.math.uio.no/faq/compression-faq/part1.html
125 * -- http scheme for Hypertext Transfer Protocol services
126 * mailto:mduerst@ifi.unizh.ch
127 * -- mailto scheme for electronic mail addresses
128 * news:comp.infosystems.www.servers.unix
129 * -- news scheme for USENET news groups and articles
130 * telnet://melvyl.ucop.edu/
131 * -- telnet scheme for interactive services via the TELNET Protocol
132 * </pre>
133 * Please, notice that there are many modifications from URL(RFC 1738) and
134 * relative URL(RFC 1808).
135 * <p>
136 * <b>The expressions for a URI</b>
137 * <p><pre>
138 * For escaped URI forms
139 * - URI(char[]) // constructor
140 * - char[] getRawXxx() // method
141 * - String getEscapedXxx() // method
142 * - String toString() // method
143 * <p>
144 * For unescaped URI forms
145 * - URI(String) // constructor
146 * - String getXXX() // method
147 * </pre><p>
148 *
149 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
150 * @version $Revision: 905 $ $Date: 2002/03/14 15:14:01
151 */
152 class URI implements Cloneable, Comparable, Serializable {
153
154
155
156
157 protected URI() {
158 }
159
160 /***
161 * Construct a URI as an escaped form of a character array.
162 * An URI can be placed within double-quotes or angle brackets like
163 * "http://test.com/" and <http://test.com/>
164 *
165 * @param escaped the URI character sequence
166 * @exception IOException
167 * @throws NullPointerException if <code>escaped</code> is <code>null</code>
168 */
169 public URI(char[] escaped) throws IOException {
170 parseUriReference(new String(escaped), true);
171 }
172
173
174 /***
175 * Construct a URI from the given string.
176 * <p><blockquote><pre>
177 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
178 * </pre></blockquote><p>
179 * An URI can be placed within double-quotes or angle brackets like
180 * "http://test.com/" and <http://test.com/>
181 *
182 * @param original the string to be represented to URI character sequence
183 * It is one of absoluteURI and relativeURI.
184 * @exception IOException
185 */
186 public URI(String original) throws IOException {
187 parseUriReference(original, false);
188 }
189
190 /***
191 * Construct a URI from a URL.
192 *
193 * @param url a valid URL.
194 * @throws IOException
195 * @since 2.0
196 */
197 public URI(URL url) throws IOException {
198 this(url.toString());
199 }
200
201
202 /***
203 * Construct a general URI from the given components.
204 * <p><blockquote><pre>
205 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
206 * absoluteURI = scheme ":" ( hier_part | opaque_part )
207 * opaque_part = uric_no_slash *uric
208 * </pre></blockquote><p>
209 * It's for absolute URI = <scheme>:<scheme-specific-part>#
210 * <fragment>.
211 *
212 * @param scheme the scheme string
213 * @param scheme_specific_part scheme_specific_part
214 * @param fragment the fragment string
215 * @exception IOException
216 */
217 public URI(String scheme, String scheme_specific_part, String fragment)
218 throws IOException {
219
220
221 if (scheme == null) {
222 throw new IOException(
223 }
224 char[] s = scheme.toLowerCase().toCharArray();
225 if (validate(s, URI.scheme)) {
226 _scheme = s;
227 } else {
228 throw new IOException(
229 }
230 _opaque = encode(scheme_specific_part, allowed_opaque_part);
231
232 _is_opaque_part = true;
233 setUriReference();
234 }
235
236
237 /***
238 * Construct a general URI from the given components.
239 * <p><blockquote><pre>
240 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
241 * absoluteURI = scheme ":" ( hier_part | opaque_part )
242 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
243 * hier_part = ( net_path | abs_path ) [ "?" query ]
244 * </pre></blockquote><p>
245 * It's for absolute URI = <scheme>:<path>?<query>#<
246 * fragment> and relative URI = <path>?<query>#<fragment
247 * >.
248 *
249 * @param scheme the scheme string
250 * @param authority the authority string
251 * @param path the path string
252 * @param query the query string
253 * @param fragment the fragment string
254 * @exception IOException
255 */
256 public URI(String scheme, String authority, String path, String query,
257 String fragment) throws IOException {
258
259
260 StringBuffer buff = new StringBuffer();
261 if (scheme != null) {
262 buff.append(scheme);
263 buff.append(':');
264 }
265 if (authority != null) {
266 buff.append("//");
267 buff.append(authority);
268 }
269 if (path != null) {
270 if ((scheme != null || authority != null)
271 && !path.startsWith("/")) {
272 throw new IOException(
273 "URI: abs_path requested");
274 }
275 buff.append(path);
276 }
277 if (query != null) {
278 buff.append('?');
279 buff.append(query);
280 }
281 if (fragment != null) {
282 buff.append('#');
283 buff.append(fragment);
284 }
285 parseUriReference(buff.toString(), false);
286 }
287
288
289 /***
290 * Construct a general URI from the given components.
291 *
292 * @param scheme the scheme string
293 * @param userinfo the userinfo string
294 * @param host the host string
295 * @param port the port number
296 * @exception IOException
297 */
298 public URI(String scheme, String userinfo, String host, int port)
299 throws IOException {
300
301 this(scheme, userinfo, host, port, null, null, null);
302 }
303
304
305 /***
306 * Construct a general URI from the given components.
307 *
308 * @param scheme the scheme string
309 * @param userinfo the userinfo string
310 * @param host the host string
311 * @param port the port number
312 * @param path the path string
313 * @exception IOException
314 */
315 public URI(String scheme, String userinfo, String host, int port,
316 String path) throws IOException {
317
318 this(scheme, userinfo, host, port, path, null, null);
319 }
320
321
322 /***
323 * Construct a general URI from the given components.
324 *
325 * @param scheme the scheme string
326 * @param userinfo the userinfo string
327 * @param host the host string
328 * @param port the port number
329 * @param path the path string
330 * @param query the query string
331 * @exception IOException
332 */
333 public URI(String scheme, String userinfo, String host, int port,
334 String path, String query) throws IOException {
335
336 this(scheme, userinfo, host, port, path, query, null);
337 }
338
339
340 /***
341 * Construct a general URI from the given components.
342 *
343 * @param scheme the scheme string
344 * @param userinfo the userinfo string
345 * @param host the host string
346 * @param port the port number
347 * @param path the path string
348 * @param query the query string
349 * @param fragment the fragment string
350 * @exception IOException
351 */
352 public URI(String scheme, String userinfo, String host, int port,
353 String path, String query, String fragment) throws IOException {
354
355 this(scheme, (host == null) ? null :
356 ((userinfo != null) ? userinfo + '@' : "") + host +
357 ((port != -1) ? ":" + port : ""), path, query, fragment);
358 }
359
360
361 /***
362 * Construct a general URI from the given components.
363 *
364 * @param scheme the scheme string
365 * @param host the host string
366 * @param path the path string
367 * @param fragment the fragment string
368 * @exception IOException
369 */
370 public URI(String scheme, String host, String path, String fragment)
371 throws IOException {
372
373 this(scheme, host, path, null, fragment);
374 }
375
376
377 /***
378 * Construct a general URI with the given relative URI string.
379 *
380 * @param base the base URI
381 * @param relative the relative URI string
382 * @exception IOException
383 */
384 public URI(URI base, String relative) throws IOException {
385 this(base, new URI(relative));
386 }
387
388
389 /***
390 * Construct a general URI with the given relative URI.
391 * <p><blockquote><pre>
392 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
393 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
394 * </pre></blockquote><p>
395 * Resolving Relative References to Absolute Form.
396 *
397 * <strong>Examples of Resolving Relative URI References</strong>
398 *
399 * Within an object with a well-defined base URI of
400 * <p><blockquote><pre>
401 * http://a/b/c/d;p?q
402 * </pre></blockquote><p>
403 * the relative URI would be resolved as follows:
404 *
405 * Normal Examples
406 *
407 * <p><blockquote><pre>
408 * g:h = g:h
409 * g = http://a/b/c/g
410 * ./g = http://a/b/c/g
411 * g/ = http://a/b/c/g/
412 * /g = http://a/g
413 * //g = http://g
414 * ?y = http://a/b/c/?y
415 * g?y = http://a/b/c/g?y
416 * #s = (current document)#s
417 * g#s = http://a/b/c/g#s
418 * g?y#s = http://a/b/c/g?y#s
419 * ;x = http://a/b/c/;x
420 * g;x = http://a/b/c/g;x
421 * g;x?y#s = http://a/b/c/g;x?y#s
422 * . = http://a/b/c/
423 * ./ = http://a/b/c/
424 * .. = http://a/b/
425 * ../ = http://a/b/
426 * ../g = http://a/b/g
427 * ../.. = http://a/
428 * ../../ = http://a/
429 * ../../g = http://a/g
430 * </pre></blockquote><p>
431 *
432 * Some URI schemes do not allow a hierarchical syntax matching the
433 * <hier_part> syntax, and thus cannot use relative references.
434 *
435 * @param base the base URI
436 * @param relative the relative URI
437 * @exception IOException
438 */
439 public URI(URI base, URI relative) throws IOException {
440
441 if (base._scheme == null) {
442 throw new IOException(
443 }
444 if (base._scheme != null) {
445 this._scheme = base._scheme;
446 this._authority = base._authority;
447 }
448 if (base._is_opaque_part || relative._is_opaque_part) {
449 this._scheme = base._scheme;
450 this._is_opaque_part = relative._is_opaque_part;
451 this._opaque = relative._opaque;
452 this._fragment = relative._fragment;
453 this.setUriReference();
454 return;
455 }
456 if (relative._scheme != null) {
457 this._scheme = relative._scheme;
458 this._is_net_path = relative._is_net_path;
459 this._authority = relative._authority;
460 if (relative._is_server) {
461 this._userinfo = relative._userinfo;
462 this._host = relative._host;
463 this._port = relative._port;
464 } else if (relative._is_reg_name) {
465 this._is_reg_name = relative._is_reg_name;
466 }
467 this._is_abs_path = relative._is_abs_path;
468 this._is_rel_path = relative._is_rel_path;
469 this._path = relative._path;
470 } else if (base._authority != null && relative._scheme == null) {
471 this._is_net_path = base._is_net_path;
472 this._authority = base._authority;
473 if (base._is_server) {
474 this._userinfo = base._userinfo;
475 this._host = base._host;
476 this._port = base._port;
477 } else if (base._is_reg_name) {
478 this._is_reg_name = base._is_reg_name;
479 }
480 }
481 if (relative._authority != null) {
482 this._is_net_path = relative._is_net_path;
483 this._authority = relative._authority;
484 if (relative._is_server) {
485 this._is_server = relative._is_server;
486 this._userinfo = relative._userinfo;
487 this._host = relative._host;
488 this._port = relative._port;
489 } else if (relative._is_reg_name) {
490 this._is_reg_name = relative._is_reg_name;
491 }
492 this._is_abs_path = relative._is_abs_path;
493 this._is_rel_path = relative._is_rel_path;
494 this._path = relative._path;
495 }
496
497 if (relative._scheme == null && relative._authority == null ||
498 equals(base._scheme, relative._scheme)) {
499 this._path = resolvePath(base._path, relative._path);
500 }
501
502 if (relative._query != null) {
503 this._query = relative._query;
504 }
505
506 if (relative._fragment != null) {
507 this._fragment = relative._fragment;
508 }
509 this.setUriReference();
510 }
511
512
513
514 static final long serialVersionUID = 604752400577948726L;
515
516
517 /***
518 * This Uniform Resource Identifier (URI).
519 * The URI is always in an "escaped" form, since escaping or unescaping
520 * a completed URI might change its semantics.
521 */
522 protected char[] _uri = null;
523
524
525 /***
526 * The default charset of the protocol. RFC 2277, 2396
527 */
528 protected static String _protocolCharset = "UTF-8";
529
530
531 /***
532 * The default charset of the document. RFC 2277, 2396
533 * The platform's charset is used for the document by default.
534 */
535 protected static String _documentCharset = null;
536
537 static {
538 Locale locale = Locale.getDefault();
539 if (locale != null) {
540
541 _documentCharset = LocaleToCharsetMap.getCharset(locale);
542 } else {
543 _documentCharset = (String)AccessController.doPrivileged(
544 new GetPropertyAction("file.encoding"));
545 }
546 }
547
548 /***
549 * The scheme.
550 */
551 protected char[] _scheme = null;
552
553
554 /***
555 * The opaque.
556 */
557 protected char[] _opaque = null;
558
559
560 /***
561 * The authority.
562 */
563 protected char[] _authority = null;
564
565
566 /***
567 * The userinfo.
568 */
569 protected char[] _userinfo = null;
570
571
572 /***
573 * The host.
574 */
575 protected char[] _host = null;
576
577
578 /***
579 * The port.
580 */
581 protected int _port = -1;
582
583
584 /***
585 * The path.
586 */
587 protected char[] _path = null;
588
589
590 /***
591 * The query.
592 */
593 protected char[] _query = null;
594
595
596 /***
597 * The fragment.
598 */
599 protected char[] _fragment = null;
600
601
602 /***
603 * The root path.
604 */
605 protected static char[] rootPath = { '/' };
606
607
608
609 /***
610 * The percent "%" character always has the reserved purpose of being the
611 * escape indicator, it must be escaped as "%25" in order to be used as
612 * data within a URI.
613 */
614 protected static final BitSet percent = new BitSet(256);
615
616 static {
617 percent.set('%');
618 }
619
620
621 /***
622 * BitSet for digit.
623 * <p><blockquote><pre>
624 * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
625 * "8" | "9"
626 * </pre></blockquote><p>
627 */
628 protected static final BitSet digit = new BitSet(256);
629
630 static {
631 for(int i = '0'; i <= '9'; i++) {
632 digit.set(i);
633 }
634 }
635
636
637 /***
638 * BitSet for alpha.
639 * <p><blockquote><pre>
640 * alpha = lowalpha | upalpha
641 * </pre></blockquote><p>
642 */
643 protected static final BitSet alpha = new BitSet(256);
644
645 static {
646 for (int i = 'a'; i <= 'z'; i++) {
647 alpha.set(i);
648 }
649 for (int i = 'A'; i <= 'Z'; i++) {
650 alpha.set(i);
651 }
652 }
653
654
655 /***
656 * BitSet for alphanum (join of alpha & digit).
657 * <p><blockquote><pre>
658 * alphanum = alpha | digit
659 * </pre></blockquote><p>
660 */
661 protected static final BitSet alphanum = new BitSet(256);
662
663 static {
664 alphanum.or(alpha);
665 alphanum.or(digit);
666 }
667
668
669 /***
670 * BitSet for hex.
671 * <p><blockquote><pre>
672 * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" |
673 * "a" | "b" | "c" | "d" | "e" | "f"
674 * </pre></blockquote><p>
675 */
676 protected static final BitSet hex = new BitSet(256);
677
678 static {
679 hex.or(digit);
680 for(int i = 'a'; i <= 'f'; i++) {
681 hex.set(i);
682 }
683 for(int i = 'A'; i <= 'F'; i++) {
684 hex.set(i);
685 }
686 }
687
688
689 /***
690 * BitSet for escaped.
691 * <p><blockquote><pre>
692 * escaped = "%" hex hex
693 * </pre></blockquote><p>
694 */
695 protected static final BitSet escaped = new BitSet(256);
696
697 static {
698 escaped.or(percent);
699 escaped.or(hex);
700 }
701
702
703 /***
704 * BitSet for mark.
705 * <p><blockquote><pre>
706 * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
707 * "(" | ")"
708 * </pre></blockquote><p>
709 */
710 protected static final BitSet mark = new BitSet(256);
711
712 static {
713 mark.set('-');
714 mark.set('_');
715 mark.set('.');
716 mark.set('!');
717 mark.set('~');
718 mark.set('*');
719 mark.set('\'');
720 mark.set('(');
721 mark.set(')');
722 }
723
724
725 /***
726 * Data characters that are allowed in a URI but do not have a reserved
727 * purpose are called unreserved.
728 * <p><blockquote><pre>
729 * unreserved = alphanum | mark
730 * </pre></blockquote><p>
731 */
732 protected static final BitSet unreserved = new BitSet(256);
733
734 static {
735 unreserved.or(alphanum);
736 unreserved.or(mark);
737 }
738
739
740 /***
741 * BitSet for reserved.
742 * <p><blockquote><pre>
743 * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
744 * "$" | ","
745 * </pre></blockquote><p>
746 */
747 protected static final BitSet reserved = new BitSet(256);
748
749 static {
750 reserved.set(';');
751 reserved.set('/');
752 reserved.set('?');
753 reserved.set(':');
754 reserved.set('@');
755 reserved.set('&');
756 reserved.set('=');
757 reserved.set('+');
758 reserved.set('$');
759 reserved.set(',');
760 }
761
762
763 /***
764 * BitSet for uric.
765 * <p><blockquote><pre>
766 * uric = reserved | unreserved | escaped
767 * </pre></blockquote><p>
768 */
769 protected static final BitSet uric = new BitSet(256);
770
771 static {
772 uric.or(reserved);
773 uric.or(unreserved);
774 uric.or(escaped);
775 }
776
777
778 /***
779 * BitSet for fragment (alias for uric).
780 * <p><blockquote><pre>
781 * fragment = *uric
782 * </pre></blockquote><p>
783 */
784 protected static final BitSet fragment = uric;
785
786
787 /***
788 * BitSet for query (alias for uric).
789 * <p><blockquote><pre>
790 * query = *uric
791 * </pre></blockquote><p>
792 */
793 protected static final BitSet query = uric;
794
795
796 /***
797 * BitSet for pchar.
798 * <p><blockquote><pre>
799 * pchar = unreserved | escaped |
800 * ":" | "@" | "&" | "=" | "+" | "$" | ","
801 * </pre></blockquote><p>
802 */
803 protected static final BitSet pchar = new BitSet(256);
804
805 static {
806 pchar.or(unreserved);
807 pchar.or(escaped);
808 pchar.set(':');
809 pchar.set('@');
810 pchar.set('&');
811 pchar.set('=');
812 pchar.set('+');
813 pchar.set('$');
814 pchar.set(',');
815 }
816
817
818 /***
819 * BitSet for param (alias for pchar).
820 * <p><blockquote><pre>
821 * param = *pchar
822 * </pre></blockquote><p>
823 */
824 protected static final BitSet param = pchar;
825
826
827 /***
828 * BitSet for segment.
829 * <p><blockquote><pre>
830 * segment = *pchar *( ";" param )
831 * </pre></blockquote><p>
832 */
833 protected static final BitSet segment = new BitSet(256);
834
835 static {
836 segment.or(pchar);
837 segment.set(';');
838 segment.or(param);
839 }
840
841
842 /***
843 * BitSet for path segments.
844 * <p><blockquote><pre>
845 * path_segments = segment *( "/" segment )
846 * </pre></blockquote><p>
847 */
848 protected static final BitSet path_segments = new BitSet(256);
849
850 static {
851 path_segments.set('/');
852 path_segments.or(segment);
853 }
854
855
856 /***
857 * URI absolute path.
858 * <p><blockquote><pre>
859 * abs_path = "/" path_segments
860 * </pre><blockquote><p>
861 */
862 protected static final BitSet abs_path = new BitSet(256);
863
864 static {
865 abs_path.set('/');
866 abs_path.or(path_segments);
867 }
868
869
870 /***
871 * URI bitset for encoding typical non-slash characters.
872 * <p><blockquote><pre>
873 * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
874 * "&" | "=" | "+" | "$" | ","
875 * </pre></blockquote><p>
876 */
877 protected static final BitSet uric_no_slash = new BitSet(256);
878
879 static {
880 uric_no_slash.or(unreserved);
881 uric_no_slash.or(escaped);
882 uric_no_slash.set(';');
883 uric_no_slash.set('?');
884 uric_no_slash.set(';');
885 uric_no_slash.set('@');
886 uric_no_slash.set('&');
887 uric_no_slash.set('=');
888 uric_no_slash.set('+');
889 uric_no_slash.set('$');
890 uric_no_slash.set(',');
891 }
892
893
894 /***
895 * URI bitset that combines uric_no_slash and uric.
896 * <p><blockquote><pre>
897 * opaque_part = uric_no_slash *uric
898 * </pre></blockquote><p>
899 */
900 protected static final BitSet opaque_part = new BitSet(256);
901
902 static {
903 opaque_part.or(uric_no_slash);
904 opaque_part.or(uric);
905 }
906
907
908 /***
909 * URI bitset that combines absolute path and opaque part.
910 * <p><blockquote><pre>
911 * path = [ abs_path | opaque_part ]
912 * </pre></blockquote><p>
913 */
914 protected static final BitSet path = new BitSet(256);
915
916 static {
917 path.or(abs_path);
918 path.or(opaque_part);
919 }
920
921
922 /***
923 * Port, a logical alias for digit.
924 */
925 protected static final BitSet port = digit;
926
927
928 /***
929 * Bitset that combines digit and dot fo IPv$address.
930 * <p><blockquote><pre>
931 * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
932 * </pre></blockquote><p>
933 */
934 protected static final BitSet IPv4address = new BitSet(256);
935
936 static {
937 IPv4address.or(digit);
938 IPv4address.set('.');
939 }
940
941
942 /***
943 * RFC 2373.
944 * <p><blockquote><pre>
945 * IPv6address = hexpart [ ":" IPv4address ]
946 * </pre></blockquote><p>
947 */
948 protected static final BitSet IPv6address = new BitSet(256);
949
950 static {
951 IPv6address.or(hex);
952 IPv6address.set(':');
953 IPv6address.or(IPv4address);
954 }
955
956
957 /***
958 * RFC 2732, 2373.
959 * <p><blockquote><pre>
960 * IPv6reference = "[" IPv6address "]"
961 * </pre></blockquote><p>
962 */
963 protected static final BitSet IPv6reference = new BitSet(256);
964
965 static {
966 IPv6reference.set('[');
967 IPv6reference.or(IPv6address);
968 IPv6reference.set(']');
969 }
970
971
972 /***
973 * BitSet for toplabel.
974 * <p><blockquote><pre>
975 * toplabel = alpha | alpha *( alphanum | "-" ) alphanum
976 * </pre></blockquote><p>
977 */
978 protected static final BitSet toplabel = new BitSet(256);
979
980 static {
981 toplabel.or(alphanum);
982 toplabel.set('-');
983 }
984
985
986 /***
987 * BitSet for domainlabel.
988 * <p><blockquote><pre>
989 * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
990 * </pre></blockquote><p>
991 */
992 protected static final BitSet domainlabel = toplabel;
993
994
995 /***
996 * BitSet for hostname.
997 * <p><blockquote><pre>
998 * hostname = *( domainlabel "." ) toplabel [ "." ]
999 * </pre></blockquote><p>
1000 */
1001 protected static final BitSet hostname = new BitSet(256);
1002
1003 static {
1004 hostname.or(toplabel);
1005
1006 hostname.set('.');
1007 }
1008
1009
1010 /***
1011 * BitSet for host.
1012 * <p><blockquote><pre>
1013 * host = hostname | IPv4address | IPv6reference
1014 * </pre></blockquote><p>
1015 */
1016 protected static final BitSet host = new BitSet(256);
1017
1018 static {
1019 host.or(hostname);
1020
1021 host.or(IPv6reference);
1022 }
1023
1024
1025 /***
1026 * BitSet for hostport.
1027 * <p><blockquote><pre>
1028 * hostport = host [ ":" port ]
1029 * </pre></blockquote><p>
1030 */
1031 protected static final BitSet hostport = new BitSet(256);
1032
1033 static {
1034 hostport.or(host);
1035 hostport.set(':');
1036 hostport.or(port);
1037 }
1038
1039
1040 /***
1041 * Bitset for userinfo.
1042 * <p><blockquote><pre>
1043 * userinfo = *( unreserved | escaped |
1044 * ";" | ":" | "&" | "=" | "+" | "$" | "," )
1045 * </pre></blockquote><p>
1046 */
1047 protected static final BitSet userinfo = new BitSet(256);
1048
1049 static {
1050 userinfo.or(unreserved);
1051 userinfo.or(escaped);
1052 userinfo.set(';');
1053 userinfo.set(':');
1054 userinfo.set('&');
1055 userinfo.set('=');
1056 userinfo.set('+');
1057 userinfo.set('$');
1058 userinfo.set(',');
1059 }
1060
1061
1062 /***
1063 * BitSet for within the userinfo component like user and password.
1064 */
1065 public static final BitSet within_userinfo = new BitSet(256);
1066
1067 static {
1068 within_userinfo.or(userinfo);
1069 within_userinfo.clear(';');
1070 within_userinfo.clear(':');
1071 within_userinfo.clear('@');
1072 within_userinfo.clear('?');
1073 within_userinfo.clear('/');
1074 }
1075
1076
1077 /***
1078 * Bitset for server.
1079 * <p><blockquote><pre>
1080 * server = [ [ userinfo "@" ] hostport ]
1081 * </pre></blockquote><p>
1082 */
1083 protected static final BitSet server = new BitSet(256);
1084
1085 static {
1086 server.or(userinfo);
1087 server.set('@');
1088 server.or(hostport);
1089 }
1090
1091
1092 /***
1093 * BitSet for reg_name.
1094 * <p><blockquote><pre>
1095 * reg_name = 1*( unreserved | escaped | "$" | "," |
1096 * ";" | ":" | "@" | "&" | "=" | "+" )
1097 * </pre></blockquote><p>
1098 */
1099 protected static final BitSet reg_name = new BitSet(256);
1100
1101 static {
1102 reg_name.or(unreserved);
1103 reg_name.or(escaped);
1104 reg_name.set('$');
1105 reg_name.set(',');
1106 reg_name.set(';');
1107 reg_name.set(':');
1108 reg_name.set('@');
1109 reg_name.set('&');
1110 reg_name.set('=');
1111 reg_name.set('+');
1112 }
1113
1114
1115 /***
1116 * BitSet for authority.
1117 * <p><blockquote><pre>
1118 * authority = server | reg_name
1119 * </pre></blockquote><p>
1120 */
1121 protected static final BitSet authority = new BitSet(256);
1122
1123 static {
1124 authority.or(server);
1125 authority.or(reg_name);
1126 }
1127
1128
1129 /***
1130 * BitSet for scheme.
1131 * <p><blockquote><pre>
1132 * scheme = alpha *( alpha | digit | "+" | "-" | "." )
1133 * </pre></blockquote><p>
1134 */
1135 protected static final BitSet scheme = new BitSet(256);
1136
1137 static {
1138 scheme.or(alpha);
1139 scheme.or(digit);
1140 scheme.set('+');
1141 scheme.set('-');
1142 scheme.set('.');
1143 }
1144
1145
1146 /***
1147 * BitSet for rel_segment.
1148 * <p><blockquote><pre>
1149 * rel_segment = 1*( unreserved | escaped |
1150 * ";" | "@" | "&" | "=" | "+" | "$" | "," )
1151 * </pre></blockquote><p>
1152 */
1153 protected static final BitSet rel_segment = new BitSet(256);
1154
1155 static {
1156 rel_segment.or(unreserved);
1157 rel_segment.or(escaped);
1158 rel_segment.set(';');
1159 rel_segment.set('@');
1160 rel_segment.set('&');
1161 rel_segment.set('=');
1162 rel_segment.set('+');
1163 rel_segment.set('$');
1164 rel_segment.set(',');
1165 }
1166
1167
1168 /***
1169 * BitSet for rel_path.
1170 * <p><blockquote><pre>
1171 * rel_path = rel_segment [ abs_path ]
1172 * </pre></blockquote><p>
1173 */
1174 protected static final BitSet rel_path = new BitSet(256);
1175
1176 static {
1177 rel_path.or(rel_segment);
1178 rel_path.or(abs_path);
1179 }
1180
1181
1182 /***
1183 * BitSet for net_path.
1184 * <p><blockquote><pre>
1185 * net_path = "//" authority [ abs_path ]
1186 * </pre></blockquote><p>
1187 */
1188 protected static final BitSet net_path = new BitSet(256);
1189
1190 static {
1191 net_path.set('/');
1192 net_path.or(authority);
1193 net_path.or(abs_path);
1194 }
1195
1196
1197 /***
1198 * BitSet for hier_part.
1199 * <p><blockquote><pre>
1200 * hier_part = ( net_path | abs_path ) [ "?" query ]
1201 * </pre></blockquote><p>
1202 */
1203 protected static final BitSet hier_part = new BitSet(256);
1204
1205 static {
1206 hier_part.or(net_path);
1207 hier_part.or(abs_path);
1208
1209 hier_part.or(query);
1210 }
1211
1212
1213 /***
1214 * BitSet for relativeURI.
1215 * <p><blockquote><pre>
1216 * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
1217 * </pre></blockquote><p>
1218 */
1219 protected static final BitSet relativeURI = new BitSet(256);
1220
1221 static {
1222 relativeURI.or(net_path);
1223 relativeURI.or(abs_path);
1224 relativeURI.or(rel_path);
1225
1226 relativeURI.or(query);
1227 }
1228
1229
1230 /***
1231 * BitSet for absoluteURI.
1232 * <p><blockquote><pre>
1233 * absoluteURI = scheme ":" ( hier_part | opaque_part )
1234 * </pre></blockquote><p>
1235 */
1236 protected static final BitSet absoluteURI = new BitSet(256);
1237
1238 static {
1239 absoluteURI.or(scheme);
1240 absoluteURI.set(':');
1241 absoluteURI.or(hier_part);
1242 absoluteURI.or(opaque_part);
1243 }
1244
1245
1246 /***
1247 * BitSet for URI-reference.
1248 * <p><blockquote><pre>
1249 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1250 * </pre></blockquote><p>
1251 */
1252 protected static final BitSet URI_reference = new BitSet(256);
1253
1254 static {
1255 URI_reference.or(absoluteURI);
1256 URI_reference.or(relativeURI);
1257 URI_reference.set('#');
1258 URI_reference.or(fragment);
1259 }
1260
1261
1262
1263
1264 /***
1265 * BitSet for control.
1266 */
1267 public static final BitSet control = new BitSet(256);
1268
1269 static {
1270 for (int i = 0; i <= 0x1F; i++) {
1271 control.set(i);
1272 }
1273 control.set(0x7F);
1274 }
1275
1276 /***
1277 * BitSet for space.
1278 */
1279 public static final BitSet space = new BitSet(256);
1280
1281 static {
1282 space.set(0x20);
1283 }
1284
1285
1286 /***
1287 * BitSet for delims.
1288 */
1289 public static final BitSet delims = new BitSet(256);
1290
1291 static {
1292 delims.set('<');
1293 delims.set('>');
1294 delims.set('#');
1295 delims.set('%');
1296 delims.set('"');
1297 }
1298
1299
1300 /***
1301 * BitSet for unwise.
1302 */
1303 public static final BitSet unwise = new BitSet(256);
1304
1305 static {
1306 unwise.set('{');
1307 unwise.set('}');
1308 unwise.set('|');
1309 unwise.set('//');
1310 unwise.set('^');
1311 unwise.set('[');
1312 unwise.set(']');
1313 unwise.set('`');
1314 }
1315
1316
1317 /***
1318 * Disallowed rel_path before escaping.
1319 */
1320 public static final BitSet disallowed_rel_path = new BitSet(256);
1321
1322 static {
1323 disallowed_rel_path.or(uric);
1324 disallowed_rel_path.andNot(rel_path);
1325 }
1326
1327
1328 /***
1329 * Disallowed opaque_part before escaping.
1330 */
1331 public static final BitSet disallowed_opaque_part = new BitSet(256);
1332
1333 static {
1334 disallowed_opaque_part.or(uric);
1335 disallowed_opaque_part.andNot(opaque_part);
1336 }
1337
1338
1339
1340 /***
1341 * Those characters that are allowed for the authority component.
1342 */
1343 public static final BitSet allowed_authority = new BitSet(256);
1344
1345 static {
1346 allowed_authority.or(authority);
1347 allowed_authority.clear('%');
1348 }
1349
1350
1351 /***
1352 * Those characters that are allowed for the opaque_part.
1353 */
1354 public static final BitSet allowed_opaque_part = new BitSet(256);
1355
1356 static {
1357 allowed_opaque_part.or(opaque_part);
1358 allowed_opaque_part.clear('%');
1359 }
1360
1361
1362 /***
1363 * Those characters that are allowed for the reg_name.
1364 */
1365 public static final BitSet allowed_reg_name = new BitSet(256);
1366
1367 static {
1368 allowed_reg_name.or(reg_name);
1369
1370 allowed_reg_name.clear('%');
1371 }
1372
1373
1374 /***
1375 * Those characters that are allowed for the userinfo component.
1376 */
1377 public static final BitSet allowed_userinfo = new BitSet(256);
1378
1379 static {
1380 allowed_userinfo.or(userinfo);
1381
1382 allowed_userinfo.clear('%');
1383 }
1384
1385
1386 /***
1387 * Those characters that are allowed for within the userinfo component.
1388 */
1389 public static final BitSet allowed_within_userinfo = new BitSet(256);
1390
1391 static {
1392 allowed_within_userinfo.or(within_userinfo);
1393 allowed_within_userinfo.clear('%');
1394 }
1395
1396
1397 /***
1398 * Those characters that are allowed for the IPv6reference component.
1399 * The characters '[', ']' in IPv6reference should be excluded.
1400 */
1401 public static final BitSet allowed_IPv6reference = new BitSet(256);
1402
1403 static {
1404 allowed_IPv6reference.or(IPv6reference);
1405
1406 allowed_IPv6reference.clear('[');
1407 allowed_IPv6reference.clear(']');
1408 }
1409
1410
1411 /***
1412 * Those characters that are allowed for the host component.
1413 * The characters '[', ']' in IPv6reference should be excluded.
1414 */
1415 public static final BitSet allowed_host = new BitSet(256);
1416
1417 static {
1418 allowed_host.or(hostname);
1419 allowed_host.or(allowed_IPv6reference);
1420 }
1421
1422
1423 /***
1424 * Those characters that are allowed for the authority component.
1425 */
1426 public static final BitSet allowed_within_authority = new BitSet(256);
1427
1428 static {
1429 allowed_within_authority.or(server);
1430 allowed_within_authority.or(reg_name);
1431 allowed_within_authority.clear(';');
1432 allowed_within_authority.clear(':');
1433 allowed_within_authority.clear('@');
1434 allowed_within_authority.clear('?');
1435 allowed_within_authority.clear('/');
1436 }
1437
1438
1439 /***
1440 * Those characters that are allowed for the abs_path.
1441 */
1442 public static final BitSet allowed_abs_path = new BitSet(256);
1443
1444 static {
1445 allowed_abs_path.or(abs_path);
1446
1447 allowed_abs_path.andNot(percent);
1448 }
1449
1450
1451 /***
1452 * Those characters that are allowed for the rel_path.
1453 */
1454 public static final BitSet allowed_rel_path = new BitSet(256);
1455
1456 static {
1457 allowed_rel_path.or(rel_path);
1458 allowed_rel_path.clear('%');
1459 }
1460
1461
1462 /***
1463 * Those characters that are allowed within the path.
1464 */
1465 public static final BitSet allowed_within_path = new BitSet(256);
1466
1467 static {
1468 allowed_within_path.or(abs_path);
1469 allowed_within_path.clear('/');
1470 allowed_within_path.clear(';');
1471 allowed_within_path.clear('=');
1472 allowed_within_path.clear('?');
1473 }
1474
1475
1476 /***
1477 * Those characters that are allowed for the query component.
1478 */
1479 public static final BitSet allowed_query = new BitSet(256);
1480
1481 static {
1482 allowed_query.or(uric);
1483 allowed_query.clear('%');
1484 }
1485
1486
1487 /***
1488 * Those characters that are allowed within the query component.
1489 */
1490 public static final BitSet allowed_within_query = new BitSet(256);
1491
1492 static {
1493 allowed_within_query.or(allowed_query);
1494 allowed_within_query.andNot(reserved);
1495 allowed_within_query.clear('#');
1496 }
1497
1498
1499 /***
1500 * Those characters that are allowed for the fragment component.
1501 */
1502 public static final BitSet allowed_fragment = new BitSet(256);
1503
1504 static {
1505 allowed_fragment.or(uric);
1506 allowed_fragment.clear('%');
1507 }
1508
1509
1510
1511
1512
1513 protected boolean _is_hier_part;
1514 protected boolean _is_opaque_part;
1515
1516
1517 protected boolean _is_net_path;
1518 protected boolean _is_abs_path;
1519 protected boolean _is_rel_path;
1520
1521
1522 protected boolean _is_reg_name;
1523 protected boolean _is_server;
1524
1525
1526 protected boolean _is_hostname;
1527 protected boolean _is_IPv4address;
1528 protected boolean _is_IPv6reference;
1529
1530
1531
1532 /***
1533 * Encode with the default protocol charset.
1534 *
1535 * @param original the original character sequence
1536 * @param allowed those characters that are allowed within a component
1537 * @return URI character sequence
1538 * @exception IOException null component or unsupported character encoding
1539 */
1540 protected static char[] encode(String original, BitSet allowed)
1541 throws IOException {
1542
1543 return encode(original, allowed, _protocolCharset);
1544 }
1545
1546
1547 /***
1548 * Encodes URI string.
1549 *
1550 * This is a two mapping, one from original characters to octets, and
1551 * subsequently a second from octets to URI characters:
1552 * <p><blockquote><pre>
1553 * original character sequence->octet sequence->URI character sequence
1554 * </pre></blockquote><p>
1555 *
1556 * An escaped octet is encoded as a character triplet, consisting of the
1557 * percent character "%" followed by the two hexadecimal digits
1558 * representing the octet code. For example, "%20" is the escaped
1559 * encoding for the US-ASCII space character.
1560 * <p>
1561 * Conversion from the local filesystem character set to UTF-8 will
1562 * normally involve a two step process. First convert the local character
1563 * set to the UCS; then convert the UCS to UTF-8.
1564 * The first step in the process can be performed by maintaining a mapping
1565 * table that includes the local character set code and the corresponding
1566 * UCS code.
1567 * The next step is to convert the UCS character code to the UTF-8 encoding.
1568 * <p>
1569 * Mapping between vendor codepages can be done in a very similar manner
1570 * as described above.
1571 * <p>
1572 * The only time escape encodings can allowedly be made is when a URI is
1573 * being created from its component parts. The escape and validate methods
1574 * are internally performed within this method.
1575 *
1576 * @param original the original character sequence
1577 * @param allowed those characters that are allowed within a component
1578 * @param charset the protocol charset
1579 * @return URI character sequence
1580 * @exception IOException null component or unsupported character encoding
1581 */
1582 protected static char[] encode(String original, BitSet allowed,
1583 String charset) throws IOException {
1584
1585
1586 if (original == null) {
1587 throw new IOException(
1588 }
1589
1590 if (allowed == null) {
1591 throw new IOException(
1592 "URI: null allowed characters");
1593 }
1594 byte[] octets;
1595 try {
1596 octets = original.getBytes(charset);
1597 } catch (UnsupportedEncodingException error) {
1598 throw new IOException(
1599 }
1600 StringBuffer buf = new StringBuffer(octets.length);
1601 for (int i = 0; i < octets.length; i++) {
1602 char c = (char) octets[i];
1603 if (allowed.get(c)) {
1604 buf.append(c);
1605 } else {
1606 buf.append('%');
1607 byte b = octets[i];
1608 char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16);
1609 buf.append(Character.toUpperCase(hexadecimal));
1610 hexadecimal = Character.forDigit(b & 0xF, 16);
1611 buf.append(Character.toUpperCase(hexadecimal));
1612 }
1613 }
1614
1615 return buf.toString().toCharArray();
1616 }
1617
1618
1619 /***
1620 * Decode with the default protocol charset.
1621 *
1622 * @param component the URI character sequence
1623 * @return original character sequence
1624 * @exception IOException incomplete trailing escape pattern
1625 * or unsupported character encoding
1626 */
1627 protected static String decode(char[] component) throws IOException {
1628 return decode(component, _protocolCharset);
1629 }
1630
1631
1632 /***
1633 * Decodes URI encoded string.
1634 *
1635 * This is a two mapping, one from URI characters to octets, and
1636 * subsequently a second from octets to original characters:
1637 * <p><blockquote><pre>
1638 * URI character sequence->octet sequence->original character sequence
1639 * </pre></blockquote><p>
1640 *
1641 * A URI must be separated into its components before the escaped
1642 * characters within those components can be allowedly decoded.
1643 * <p>
1644 * Notice that there is a chance that URI characters that are non UTF-8
1645 * may be parsed as valid UTF-8. A recent non-scientific analysis found
1646 * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1647 * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1648 * false reading.
1649 * <p>
1650 * The percent "%" character always has the reserved purpose of being
1651 * the escape indicator, it must be escaped as "%25" in order to be used
1652 * as data within a URI.
1653 * <p>
1654 * The unescape method is internally performed within this method.
1655 *
1656 * @param component the URI character sequence
1657 * @param charset the protocol charset
1658 * @return original character sequence
1659 * @exception IOException incomplete trailing escape pattern
1660 * or unsupported character encoding
1661 */
1662 protected static String decode(char[] component, String charset)
1663 throws IOException {
1664
1665
1666 if (component == null) return null;
1667
1668 byte[] octets;
1669 try {
1670 octets = new String(component).getBytes(charset);
1671 } catch (UnsupportedEncodingException error) {
1672 throw new IOException(
1673 "URI: not supported " + charset + " encoding");
1674 }
1675 int length = octets.length;
1676 int oi = 0;
1677 for (int ii = 0; ii < length; oi++) {
1678 byte aByte = (byte) octets[ii++];
1679 if (aByte == '%' && ii+2 <= length) {
1680 byte high = (byte) Character.digit((char) octets[ii++], 16);
1681 byte low = (byte) Character.digit((char) octets[ii++], 16);
1682 if (high == -1 || low == -1) {
1683 throw new IOException(
1684 "URI: incomplete trailing escape pattern");
1685
1686 }
1687 aByte = (byte) ((high << 4) + low);
1688 }
1689 octets[oi] = (byte) aByte;
1690 }
1691
1692 String result;
1693 try {
1694 result = new String(octets, 0, oi, charset);
1695 } catch (UnsupportedEncodingException error) {
1696 throw new IOException(
1697 "URI: not supported " + charset + " encoding");
1698 }
1699
1700 return result;
1701 }
1702
1703
1704 /***
1705 * Pre-validate the unescaped URI string within a specific component.
1706 *
1707 * @param component the component string within the component
1708 * @param disallowed those characters disallowed within the component
1709 * @return if true, it doesn't have the disallowed characters
1710 * if false, the component is undefined or an incorrect one
1711 */
1712 protected boolean prevalidate(String component, BitSet disallowed) {
1713
1714 if (component == null) {
1715 return false;
1716 }
1717 char[] target = component.toCharArray();
1718 for (int i = 0; i < target.length; i++) {
1719 if (disallowed.get(target[i])) {
1720 return false;
1721 }
1722 }
1723 return true;
1724 }
1725
1726
1727 /***
1728 * Validate the URI characters within a specific component.
1729 * The component must be performed after escape encoding. Or it doesn't
1730 * include escaped characters.
1731 *
1732 * @param component the characters sequence within the component
1733 * @param generous those characters that are allowed within a component
1734 * @return if true, it's the correct URI character sequence
1735 */
1736 protected boolean validate(char[] component, BitSet generous) {
1737
1738 return validate(component, 0, -1, generous);
1739 }
1740
1741
1742 /***
1743 * Validate the URI characters within a specific component.
1744 * The component must be performed after escape encoding. Or it doesn't
1745 * include escaped characters.
1746 * <p>
1747 * It's not that much strict, generous. The strict validation might be
1748 * performed before being called this method.
1749 *
1750 * @param component the characters sequence within the component
1751 * @param soffset the starting offset of the given component
1752 * @param eoffset the ending offset of the given component
1753 * if -1, it means the length of the component
1754 * @param generous those characters that are allowed within a component
1755 * @return if true, it's the correct URI character sequence
1756 * @throws NullPointerException null component
1757 */
1758 protected boolean validate(char[] component, int soffset, int eoffset,
1759 BitSet generous) {
1760
1761 if (eoffset == -1) {
1762 eoffset = component.length -1;
1763 }
1764 for (int i = soffset; i <= eoffset; i++) {
1765 if (!generous.get(component[i])) return false;
1766 }
1767 return true;
1768 }
1769
1770
1771 /***
1772 * In order to avoid any possilbity of conflict with non-ASCII characters,
1773 * Parse a URI reference as a <code>String</code> with the character
1774 * encoding of the local system or the document.
1775 * <p>
1776 * The following line is the regular expression for breaking-down a URI
1777 * reference into its components.
1778 * <p><blockquote><pre>
1779 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1780 * 12 3 4 5 6 7 8 9
1781 * </pre></blockquote><p>
1782 * For example, matching the above expression to
1783 * http://jakarta.apache.org/ietf/uri/#Related
1784 * results in the following subexpression matches:
1785 * <p><blockquote><pre>
1786 * $1 = http:
1787 * scheme = $2 = http
1788 * $3 = //jakarta.apache.org
1789 * authority = $4 = jakarta.apache.org
1790 * path = $5 = /ietf/uri/
1791 * $6 = <undefined>
1792 * query = $7 = <undefined>
1793 * $8 = #Related
1794 * fragment = $9 = Related
1795 * </pre></blockquote><p>
1796 *
1797 * @param original the original character sequence
1798 * @param escaped <code>true</code> if <code>original</code> is escaped
1799 * @return the original character sequence
1800 * @exception IOException
1801 */
1802 protected void parseUriReference(String original, boolean escaped)
1803 throws IOException {
1804
1805
1806 if (original == null || original.length() == 0) {
1807 throw new IOException("URI-Reference required");
1808 }
1809
1810 /*** @
1811 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1812 */
1813 String tmp = original.trim();
1814
1815 /***
1816 * The length of the string sequence of characters.
1817 * It may not be equal to the length of the byte array.
1818 */
1819 int length = tmp.length();
1820
1821 /***
1822 * Remove the delimiters like angle brackets around an URI.
1823 */
1824 char[] firstDelimiter = { tmp.charAt(0) };
1825 if (validate(firstDelimiter, delims)) {
1826 if (length >= 2) {
1827 char[] lastDelimiter = { tmp.charAt(length - 1) };
1828 if (validate(lastDelimiter, delims)) {
1829 tmp = tmp.substring(1, length - 1);
1830 length = length - 2;
1831 }
1832 }
1833 }
1834
1835 /***
1836 * The starting index
1837 */
1838 int from = 0;
1839
1840 /***
1841 * The test flag whether the URI is started from the path component.
1842 */
1843 boolean isStartedFromPath = false;
1844 int atColon = tmp.indexOf(':');
1845 int atSlash = tmp.indexOf('/');
1846 if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) {
1847 isStartedFromPath = true;
1848 }
1849
1850 /***
1851 * <p><blockquote><pre>
1852 * @@@@@@@@
1853 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1854 * </pre></blockquote><p>
1855 */
1856 int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1857 if (at == -1) at = 0;
1858
1859 /***
1860 * Parse the scheme.
1861 * <p><blockquote><pre>
1862 * scheme = $2 = http
1863 * @
1864 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1865 * </pre></blockquote><p>
1866 */
1867 if (at < length && tmp.charAt(at) == ':') {
1868 char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1869 if (validate(target, scheme)) {
1870 _scheme = target;
1871 } else {
1872 throw new IOException("incorrect scheme");
1873 }
1874 from = ++at;
1875 }
1876
1877 /***
1878 * Parse the authority component.
1879 * <p><blockquote><pre>
1880 * authority = $4 = jakarta.apache.org
1881 * @@
1882 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1883 * </pre></blockquote><p>
1884 */
1885
1886 _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1887 if (0 <= at && at < length && tmp.charAt(at) == '/') {
1888
1889 _is_hier_part = true;
1890 if (at + 2 < length && tmp.charAt(at + 1) == '/') {
1891
1892 int next = indexFirstOf(tmp, "/?#", at + 2);
1893 if (next == -1) {
1894 next = (tmp.substring(at + 2).length() == 0) ? at + 2 :
1895 tmp.length();
1896 }
1897 parseAuthority(tmp.substring(at + 2, next), escaped);
1898 from = at = next;
1899
1900 _is_net_path = true;
1901 }
1902 if (from == at) {
1903
1904 _is_abs_path = true;
1905 }
1906 }
1907
1908 /***
1909 * Parse the path component.
1910 * <p><blockquote><pre>
1911 * path = $5 = /ietf/uri/
1912 * @@@@@@
1913 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1914 * </pre></blockquote><p>
1915 */
1916 if (from < length) {
1917
1918 int next = indexFirstOf(tmp, "?#", from);
1919 if (next == -1) {
1920 next = tmp.length();
1921 }
1922 if (!_is_abs_path) {
1923 if (!escaped && prevalidate(tmp.substring(from, next),
1924 disallowed_rel_path) || escaped &&
1925 validate(tmp.substring(from, next).toCharArray(),
1926 rel_path)) {
1927
1928 _is_rel_path = true;
1929 } else if (!escaped && prevalidate(tmp.substring(from, next),
1930 disallowed_opaque_part) || escaped &&
1931 validate(tmp.substring(from, next).toCharArray(),
1932 opaque_part)) {
1933
1934 _is_opaque_part = true;
1935 } else {
1936
1937 _path = null;
1938 }
1939 }
1940 setPath(tmp.substring(from, next));
1941 at = next;
1942 }
1943
1944 /***
1945 * Parse the query component.
1946 * <p><blockquote><pre>
1947 * query = $7 = <undefined>
1948 * @@@@@@@@@
1949 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1950 * </pre></blockquote><p>
1951 */
1952 if (0 <= at && at+1 < length && tmp.charAt(at) == '?') {
1953 int next = tmp.indexOf('#', at + 1);
1954 if (next == -1) {
1955 next = tmp.length();
1956 }
1957 _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() :
1958 encode(tmp.substring(at + 1, next), allowed_query);
1959 at = next;
1960 }
1961
1962 /***
1963 * Parse the fragment component.
1964 * <p><blockquote><pre>
1965 * fragment = $9 = Related
1966 * @@@@@@@@
1967 * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1968 * </pre></blockquote><p>
1969 */
1970 if (0 <= at && at+1 < length && tmp.charAt(at) == '#') {
1971 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() :
1972 encode(tmp.substring(at + 1), allowed_fragment);
1973 }
1974
1975
1976 setUriReference();
1977 }
1978
1979
1980 /***
1981 * Get the earlier index that to be searched for the first occurrance in
1982 * one of any of the given string.
1983 *
1984 * @param s the string to be indexed
1985 * @param delims the delimiters used to index
1986 * @return the earlier index if there are delimiters
1987 */
1988 protected int indexFirstOf(String s, String delims) {
1989 return indexFirstOf(s, delims, -1);
1990 }
1991
1992
1993 /***
1994 * Get the earlier index that to be searched for the first occurrance in
1995 * one of any of the given string.
1996 *
1997 * @param s the string to be indexed
1998 * @param delims the delimiters used to index
1999 * @param offset the from index
2000 * @return the earlier index if there are delimiters
2001 */
2002 protected int indexFirstOf(String s, String delims, int offset) {
2003 if (s == null || s.length() == 0) {
2004 return -1;
2005 }
2006 if (delims == null || delims.length() == 0) {
2007 return -1;
2008 }
2009
2010 if (offset < 0) {
2011 offset = 0;
2012 } else if (offset > s.length()) {
2013 return -1;
2014 }
2015
2016 int min = s.length();
2017 char[] delim = delims.toCharArray();
2018 for (int i = 0; i < delim.length; i++) {
2019 int at = s.indexOf(delim[i], offset);
2020 if (at >= 0 && at < min) {
2021 min = at;
2022 }
2023 }
2024 return (min == s.length()) ? -1 : min;
2025 }
2026
2027
2028 /***
2029 * Get the earlier index that to be searched for the first occurrance in
2030 * one of any of the given array.
2031 *
2032 * @param s the character array to be indexed
2033 * @param delim the delimiter used to index
2034 * @return the ealier index if there are a delimiter
2035 */
2036 protected int indexFirstOf(char[] s, char delim) {
2037 return indexFirstOf(s, delim, 0);
2038 }
2039
2040
2041 /***
2042 * Get the earlier index that to be searched for the first occurrance in
2043 * one of any of the given array.
2044 *
2045 * @param s the character array to be indexed
2046 * @param delim the delimiter used to index
2047 * @return the ealier index if there is a delimiter
2048 */
2049 protected int indexFirstOf(char[] s, char delim, int offset) {
2050 if (s == null || s.length == 0) {
2051 return -1;
2052 }
2053
2054 if (offset < 0) {
2055 offset = 0;
2056 } else if (offset > s.length) {
2057 return -1;
2058 }
2059 for (int i = offset; i < s.length; i++) {
2060 if (s[i] == delim) {
2061 return i;
2062 }
2063 }
2064 return -1;
2065 }
2066
2067
2068 /***
2069 * Parse the authority component.
2070 *
2071 * @param original the original character sequence of authority component
2072 * @param escaped <code>true</code> if <code>original</code> is escaped
2073 * @exception IOException
2074 */
2075 protected void parseAuthority(String original, boolean escaped)
2076 throws IOException {
2077
2078
2079 _is_reg_name = _is_server =
2080 _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2081
2082 boolean has_port = true;
2083 int from = 0;
2084 int next = original.indexOf('@');
2085 if (next != -1) {
2086
2087 _userinfo = (escaped) ? original.substring(0, next).toCharArray() :
2088 encode(original.substring(0, next), allowed_userinfo);
2089 from = next + 1;
2090 }
2091 next = original.indexOf('[', from);
2092 if (next >= from) {
2093 next = original.indexOf(']', from);
2094 if (next == -1) {
2095 throw new IOException(
2096 } else {
2097 next++;
2098 }
2099
2100 _host = (escaped) ? original.substring(from, next).toCharArray() :
2101 encode(original.substring(from, next), allowed_IPv6reference);
2102
2103 _is_IPv6reference = true;
2104 } else {
2105 next = original.indexOf(':', from);
2106 if (next == -1) {
2107 next = original.length();
2108 has_port = false;
2109 }
2110
2111 _host = original.substring(from, next).toCharArray();
2112 if (validate(_host, IPv4address)) {
2113
2114 _is_IPv4address = true;
2115 } else if (validate(_host, hostname)) {
2116
2117 _is_hostname = true;
2118 } else {
2119
2120 _is_reg_name = true;
2121 }
2122 }
2123 if (_is_reg_name) {
2124
2125 _is_server = _is_hostname = _is_IPv4address =
2126 _is_IPv6reference = false;
2127
2128 _authority = (escaped) ? original.toString().toCharArray() :
2129 encode(original.toString(), allowed_reg_name);
2130 } else {
2131 if (original.length()-1 > next && has_port &&
2132 original.charAt(next) == ':') {
2133 from = next + 1;
2134 try {
2135 _port = Integer.parseInt(original.substring(from));
2136 } catch (NumberFormatException error) {
2137 throw new IOException(
2138 "URI: invalid port number");
2139 }
2140 }
2141
2142 StringBuffer buf = new StringBuffer();
2143 if (_userinfo != null) {
2144 buf.append(_userinfo);
2145 buf.append('@');
2146 }
2147 if (_host != null) {
2148 buf.append(_host);
2149 if (_port != -1) {
2150 buf.append(':');
2151 buf.append(_port);
2152 }
2153 }
2154 _authority = buf.toString().toCharArray();
2155
2156 _is_server = true;
2157 }
2158 }
2159
2160
2161 /***
2162 * Once it's parsed successfully, set this URI.
2163 *
2164 * @see #getRawURI
2165 */
2166 protected void setUriReference() {
2167
2168 StringBuffer buf = new StringBuffer();
2169
2170 if (_scheme != null) {
2171 buf.append(_scheme);
2172 buf.append(':');
2173 }
2174 if (_is_net_path) {
2175 buf.append("//");
2176 if (_authority != null) {
2177 if (_userinfo != null) {
2178 if (_host != null) {
2179 buf.append(_host);
2180 if (_port != -1) {
2181 buf.append(':');
2182 buf.append(_port);
2183 }
2184 }
2185 } else {
2186 buf.append(_authority);
2187 }
2188 }
2189 }
2190 if (_opaque != null && _is_opaque_part) {
2191 buf.append(_opaque);
2192 } else if (_path != null) {
2193
2194 if (_path.length != 0) {
2195 buf.append(_path);
2196 }
2197 }
2198 if (_query != null) {
2199 buf.append('?');
2200 buf.append(_query);
2201 }
2202 if (_fragment != null) {
2203 buf.append('#');
2204 buf.append(_fragment);
2205 }
2206
2207 _uri = buf.toString().toCharArray();
2208 }
2209
2210
2211
2212
2213 /***
2214 * Tell whether or not this URI is absolute.
2215 *
2216 * @return true iif this URI is absoluteURI
2217 */
2218 public boolean isAbsoluteURI() {
2219 return (_scheme != null);
2220 }
2221
2222
2223 /***
2224 * Tell whether or not this URI is relative.
2225 *
2226 * @return true iif this URI is relativeURI
2227 */
2228 public boolean isRelativeURI() {
2229 return (_scheme == null);
2230 }
2231
2232
2233 /***
2234 * Tell whether or not the absoluteURI of this URI is hier_part.
2235 *
2236 * @return true iif the absoluteURI is hier_part
2237 */
2238 public boolean isHierPart() {
2239 return _is_hier_part;
2240 }
2241
2242
2243 /***
2244 * Tell whether or not the absoluteURI of this URI is opaque_part.
2245 *
2246 * @return true iif the absoluteURI is opaque_part
2247 */
2248 public boolean isOpaquePart() {
2249 return _is_opaque_part;
2250 }
2251
2252
2253 /***
2254 * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2255 * It's the same function as the has_authority() method.
2256 *
2257 * @return true iif the relativeURI or heir_part is net_path
2258 * @see #hasAuthority
2259 */
2260 public boolean isNetPath() {
2261 return _is_net_path || (_authority != null);
2262 }
2263
2264
2265 /***
2266 * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2267 *
2268 * @return true iif the relativeURI or hier_part is abs_path
2269 */
2270 public boolean isAbsPath() {
2271 return _is_abs_path;
2272 }
2273
2274
2275 /***
2276 * Tell whether or not the relativeURI of this URI is rel_path.
2277 *
2278 * @return true iif the relativeURI is rel_path
2279 */
2280 public boolean isRelPath() {
2281 return _is_rel_path;
2282 }
2283
2284
2285 /***
2286 * Tell whether or not this URI has authority.
2287 * It's the same function as the is_net_path() method.
2288 *
2289 * @return true iif this URI has authority
2290 * @see #isNetPath
2291 */
2292 public boolean hasAuthority() {
2293 return (_authority != null) || _is_net_path;
2294 }
2295
2296 /***
2297 * Tell whether or not the authority component of this URI is reg_name.
2298 *
2299 * @return true iif the authority component is reg_name
2300 */
2301 public boolean isRegName() {
2302 return _is_reg_name;
2303 }
2304
2305
2306 /***
2307 * Tell whether or not the authority component of this URI is server.
2308 *
2309 * @return true iif the authority component is server
2310 */
2311 public boolean isServer() {
2312 return _is_server;
2313 }
2314
2315
2316 /***
2317 * Tell whether or not this URI has userinfo.
2318 *
2319 * @return true iif this URI has userinfo
2320 */
2321 public boolean hasUserinfo() {
2322 return (_userinfo != null);
2323 }
2324
2325
2326 /***
2327 * Tell whether or not the host part of this URI is hostname.
2328 *
2329 * @return true iif the host part is hostname
2330 */
2331 public boolean isHostname() {
2332 return _is_hostname;
2333 }
2334
2335
2336 /***
2337 * Tell whether or not the host part of this URI is IPv4address.
2338 *
2339 * @return true iif the host part is IPv4address
2340 */
2341 public boolean isIPv4address() {
2342 return _is_IPv4address;
2343 }
2344
2345
2346 /***
2347 * Tell whether or not the host part of this URI is IPv6reference.
2348 *
2349 * @return true iif the host part is IPv6reference
2350 */
2351 public boolean isIPv6reference() {
2352 return _is_IPv6reference;
2353 }
2354
2355
2356 /***
2357 * Tell whether or not this URI has query.
2358 *
2359 * @return true iif this URI has query
2360 */
2361 public boolean hasQuery() {
2362 return (_query != null);
2363 }
2364
2365
2366 /***
2367 * Tell whether or not this URI has fragment.
2368 *
2369 * @return true iif this URI has fragment
2370 */
2371 public boolean hasFragment() {
2372 return (_fragment != null);
2373 }
2374
2375
2376
2377
2378
2379 /***
2380 * Set the default charset of the protocol.
2381 * <p>
2382 * The character set used to store files SHALL remain a local decision and
2383 * MAY depend on the capability of local operating systems. Prior to the
2384 * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2385 * and UTF-8 encoded. This approach, while allowing international exchange
2386 * of URIs, will still allow backward compatibility with older systems
2387 * because the code set positions for ASCII characters are identical to the
2388 * one byte sequence in UTF-8.
2389 * <p>
2390 * An individual URI scheme may require a single charset, define a default
2391 * charset, or provide a way to indicate the charset used.
2392 *
2393 * @param charset the default charset for each protocol
2394 */
2395 public static void setProtocolCharset(String charset) {
2396 _protocolCharset = charset;
2397 }
2398
2399
2400 /***
2401 * Get the default charset of the protocol.
2402 * <p>
2403 * An individual URI scheme may require a single charset, define a default
2404 * charset, or provide a way to indicate the charset used.
2405 * <p>
2406 * To work globally either requires support of a number of character sets
2407 * and to be able to convert between them, or the use of a single preferred
2408 * character set.
2409 * For support of global compatibility it is STRONGLY RECOMMENDED that
2410 * clients and servers use UTF-8 encoding when exchanging URIs.
2411 *
2412 * @return the charset string
2413 */
2414 public static String getProtocolCharset() {
2415 return _protocolCharset;
2416 }
2417
2418
2419 /***
2420 * Set the default charset of the document.
2421 * <p>
2422 * Notice that it will be possible to contain mixed characters (e.g.
2423 * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2424 * display of these character sets, the protocol charset could be simply
2425 * used again. Because it's not yet implemented that the insertion of BIDI
2426 * control characters at different points during composition is extracted.
2427 *
2428 * @param charset the default charset for the document
2429 */
2430 public static void setDocumentCharset(String charset) {
2431 _documentCharset = charset;
2432 }
2433
2434
2435 /***
2436 * Get the default charset of the document.
2437 *
2438 * @return the charset string
2439 */
2440 public static String getDocumentCharset() {
2441 return _documentCharset;
2442 }
2443
2444
2445
2446 /***
2447 * Get the scheme.
2448 *
2449 * @return the scheme
2450 */
2451 public char[] getRawScheme() {
2452 return _scheme;
2453 }
2454
2455
2456 /***
2457 * Get the scheme.
2458 *
2459 * @return the scheme
2460 * null if undefined scheme
2461 */
2462 public String getScheme() {
2463 return (_scheme == null) ? null : new String(_scheme);
2464 }
2465
2466
2467
2468 /***
2469 * Set the authority. It can be one type of server, hostport, hostname,
2470 * IPv4address, IPv6reference and reg_name.
2471 * <p><blockquote><pre>
2472 * authority = server | reg_name
2473 * </pre></blockquote><p>
2474 *
2475 * @param escapedAuthority the raw escaped authority
2476 * @exception IOException
2477 * @throws NullPointerException null authority
2478 */
2479 public void setRawAuthority(char[] escapedAuthority) throws IOException {
2480 parseAuthority(new String(escapedAuthority), true);
2481 setUriReference();
2482 }
2483
2484
2485 /***
2486 * Set the authority. It can be one type of server, hostport, hostname,
2487 * IPv4address, IPv6reference and reg_name.
2488 * Note that there is no setAuthority method by the escape encoding reason.
2489 *
2490 * @param escapedAuthority the escaped authority string
2491 * @exception IOException
2492 */
2493 public void setEscapedAuthority(String escapedAuthority)
2494 throws IOException {
2495
2496 parseAuthority(escapedAuthority, true);
2497 setUriReference();
2498 }
2499
2500
2501 /***
2502 * Get the raw-escaped authority.
2503 *
2504 * @return the raw-escaped authority
2505 */
2506 public char[] getRawAuthority() {
2507 return _authority;
2508 }
2509
2510
2511 /***
2512 * Get the escaped authority.
2513 *
2514 * @return the escaped authority
2515 */
2516 public String getEscapedAuthority() {
2517 return (_authority == null) ? null : new String(_authority);
2518 }
2519
2520
2521 /***
2522 * Get the authority.
2523 *
2524 * @return the authority
2525 * @exception IOException
2526 * @see #decode
2527 */
2528 public String getAuthority() throws IOException {
2529 return (_authority == null) ? null : decode(_authority);
2530 }
2531
2532
2533
2534 /***
2535 * Get the raw-escaped userinfo.
2536 *
2537 * @return the raw-escaped userinfo
2538 * @see #getAuthority
2539 */
2540 public char[] getRawUserinfo() {
2541 return _userinfo;
2542 }
2543
2544
2545 /***
2546 * Get the escaped userinfo.
2547 *
2548 * @return the escaped userinfo
2549 * @see #getAuthority
2550 */
2551 public String getEscapedUserinfo() {
2552 return (_userinfo == null) ? null : new String(_userinfo);
2553 }
2554
2555
2556 /***
2557 * Get the userinfo.
2558 *
2559 * @return the userinfo
2560 * @exception IOException
2561 * @see #decode
2562 * @see #getAuthority
2563 */
2564 public String getUserinfo() throws IOException {
2565 return (_userinfo == null) ? null : decode(_userinfo);
2566 }
2567
2568
2569
2570 /***
2571 * Get the host.
2572 * <p><blockquote><pre>
2573 * host = hostname | IPv4address | IPv6reference
2574 * </pre></blockquote><p>
2575 *
2576 * @return the host
2577 * @see #getAuthority
2578 */
2579 public char[] getRawHost() {
2580 return _host;
2581 }
2582
2583
2584 /***
2585 * Get the host.
2586 * <p><blockquote><pre>
2587 * host = hostname | IPv4address | IPv6reference
2588 * </pre></blockquote><p>
2589 *
2590 * @return the host
2591 * @exception IOException
2592 * @see #decode
2593 * @see #getAuthority
2594 */
2595 public String getHost() throws IOException {
2596 return decode(_host);
2597 }
2598
2599
2600
2601 /***
2602 * Get the port. In order to get the specfic default port, the specific
2603 * protocol-supported class extended from the URI class should be used.
2604 * It has the server-based naming authority.
2605 *
2606 * @return the port
2607 * if -1, it has the default port for the scheme or the server-based
2608 * naming authority is not supported in the specific URI.
2609 */
2610 public int getPort() {
2611 return _port;
2612 }
2613
2614
2615
2616 /***
2617 * Set the path. The method couldn't be used by API programmers.
2618 *
2619 * @param path the path string
2620 * @exception IOException set incorrectly or fragment only
2621 * @see #encode
2622 */
2623 protected void setPath(String path) throws IOException {
2624
2625
2626 if (_is_net_path || _is_abs_path) {
2627 _path = encode(path, allowed_abs_path);
2628 } else if (_is_rel_path) {
2629 StringBuffer buff = new StringBuffer(path.length());
2630 int at = path.indexOf('/');
2631 if (at > 0) {
2632 buff.append(encode(path.substring(0, at), allowed_rel_path));
2633 buff.append(encode(path.substring(at), allowed_abs_path));
2634 } else {
2635 buff.append(encode(path, allowed_rel_path));
2636 }
2637 _path = buff.toString().toCharArray();
2638 } else if (_is_opaque_part) {
2639 _opaque = encode(path, allowed_opaque_part);
2640 } else {
2641 throw new IOException(
2642 }
2643 }
2644
2645
2646 /***
2647 * Resolve the base and relative path.
2648 *
2649 * @param base_path a character array of the base_path
2650 * @param rel_path a character array of the rel_path
2651 * @return the resolved path
2652 */
2653 protected char[] resolvePath(char[] base_path, char[] rel_path) {
2654
2655
2656 String base = (base_path == null) ? "" : new String(base_path);
2657 int at = base.lastIndexOf('/');
2658 if (at != -1) {
2659 base_path = base.substring(0, at + 1).toCharArray();
2660 }
2661
2662 if (rel_path == null || rel_path.length == 0) {
2663 return normalize(base_path);
2664 } else if (rel_path[0] == '/') {
2665 return rel_path;
2666 } else {
2667 StringBuffer buff = new StringBuffer(base.length() +
2668 rel_path.length);
2669 if (at != -1) {
2670 buff.append(base.substring(0, at + 1));
2671 buff.append(rel_path);
2672 }
2673 return normalize(buff.toString().toCharArray());
2674 }
2675 }
2676
2677
2678 /***
2679 * Get the raw-escaped current hierarchy level in the given path.
2680 * If the last namespace is a collection, the slash mark ('/') should be
2681 * ended with at the last character of the path string.
2682 *
2683 * @param path the path
2684 * @return the current hierarchy level
2685 * @exception IOException no hierarchy level
2686 */
2687 protected char[] getRawCurrentHierPath(char[] path) throws IOException {
2688
2689 if (_is_opaque_part) {
2690 throw new IOException(
2691 }
2692 if (path == null) {
2693 throw new IOException(
2694 }
2695 String buff = new String(path);
2696 int first = buff.indexOf('/');
2697 int last = buff.lastIndexOf('/');
2698 if (last == 0) {
2699 return rootPath;
2700 } else if (first != last && last != -1) {
2701 return buff.substring(0, last).toCharArray();
2702 }
2703
2704 return path;
2705 }
2706
2707
2708 /***
2709 * Get the raw-escaped current hierarchy level.
2710 *
2711 * @return the raw-escaped current hierarchy level
2712 * @exception IOException no hierarchy level
2713 */
2714 public char[] getRawCurrentHierPath() throws IOException {
2715 return (_path == null) ? null : getRawCurrentHierPath(_path);
2716 }
2717
2718
2719 /***
2720 * Get the escaped current hierarchy level.
2721 *
2722 * @return the escaped current hierarchy level
2723 * @exception IOException no hierarchy level
2724 */
2725 public String getEscapedCurrentHierPath() throws IOException {
2726 char[] path = getRawCurrentHierPath();
2727 return (path == null) ? null : new String(path);
2728 }
2729
2730
2731 /***
2732 * Get the current hierarchy level.
2733 *
2734 * @return the current hierarchy level
2735 * @exception IOException
2736 * @see #decode
2737 */
2738 public String getCurrentHierPath() throws IOException {
2739 char[] path = getRawCurrentHierPath();
2740 return (path == null) ? null : decode(path);
2741 }
2742
2743
2744 /***
2745 * Get the level above the this hierarchy level.
2746 *
2747 * @return the raw above hierarchy level
2748 * @exception IOException
2749 */
2750 public char[] getRawAboveHierPath() throws IOException {
2751 char[] path = getRawCurrentHierPath();
2752 return (path == null) ? null : getRawCurrentHierPath(path);
2753 }
2754
2755
2756 /***
2757 * Get the level above the this hierarchy level.
2758 *
2759 * @return the raw above hierarchy level
2760 * @exception IOException
2761 */
2762 public String getEscapedAboveHierPath() throws IOException {
2763 char[] path = getRawAboveHierPath();
2764 return (path == null) ? null : new String(path);
2765 }
2766
2767
2768 /***
2769 * Get the level above the this hierarchy level.
2770 *
2771 * @return the above hierarchy level
2772 * @exception IOException
2773 * @see #decode
2774 */
2775 public String getAboveHierPath() throws IOException {
2776 char[] path = getRawAboveHierPath();
2777 return (path == null) ? null : decode(path);
2778 }
2779
2780
2781 /***
2782 * Get the raw-escaped path.
2783 * <p><blockquote><pre>
2784 * path = [ abs_path | opaque_part ]
2785 * </pre></blockquote><p>
2786 *
2787 * @return the raw-escaped path
2788 */
2789 public char[] getRawPath() {
2790 return _is_opaque_part ? _opaque : _path;
2791 }
2792
2793
2794 /***
2795 * Get the escaped path.
2796 * <p><blockquote><pre>
2797 * path = [ abs_path | opaque_part ]
2798 * abs_path = "/" path_segments
2799 * opaque_part = uric_no_slash *uric
2800 * </pre></blockquote><p>
2801 *
2802 * @return the escaped path string
2803 */
2804 public String getEscapedPath() {
2805 char[] path = getRawPath();
2806 return (path == null) ? null : new String(path);
2807 }
2808
2809
2810 /***
2811 * Get the path.
2812 * <p><blockquote><pre>
2813 * path = [ abs_path | opaque_part ]
2814 * </pre></blockquote><p>
2815 * @return the path string
2816 * @exception IOException
2817 * @see #decode
2818 */
2819 public String getPath() throws IOException {
2820 char[] path = getRawPath();
2821 return (path == null) ? null : decode(path);
2822 }
2823
2824
2825 /***
2826 * Get the raw-escaped basename of the path.
2827 *
2828 * @return the raw-escaped basename
2829 */
2830 public char[] getRawName() {
2831 if (_path == null) return null;
2832
2833 int at = 0;
2834 for (int i = _path.length - 1; i >= 0; i--) {
2835 if (_path[i] == '/') {
2836 at = i + 1;
2837 break;
2838 }
2839 }
2840 int len = _path.length - at;
2841 char[] basename = new char[len];
2842 System.arraycopy(_path, at, basename, 0, len);
2843 return basename;
2844 }
2845
2846
2847 /***
2848 * Get the escaped basename of the path.
2849 *
2850 * @return the escaped basename string
2851 */
2852 public String getEscapedName() {
2853 char[] basename = getRawName();
2854 return (basename == null) ? null : new String(basename);
2855 }
2856
2857
2858 /***
2859 * Get the basename of the path.
2860 *
2861 * @return the basename string
2862 * @exception IOException incomplete trailing escape pattern
2863 * Or unsupported character encoding
2864 * @see #decode
2865 */
2866 public String getName() throws IOException {
2867 char[] basename = getRawName();
2868 return (basename == null) ? null : decode(getRawName());
2869 }
2870
2871
2872
2873 /***
2874 * Get the raw-escaped path and query.
2875 *
2876 * @return the raw-escaped path and query
2877 */
2878 public char[] getRawPathQuery() {
2879
2880 if (_path == null && _query == null) {
2881 return null;
2882 }
2883 StringBuffer buff = new StringBuffer();
2884 if (_path != null) {
2885 buff.append(_path);
2886 }
2887 if (_query != null) {
2888 buff.append('?');
2889 buff.append(_query);
2890 }
2891 return buff.toString().toCharArray();
2892 }
2893
2894
2895 /***
2896 * Get the escaped query.
2897 *
2898 * @return the escaped path and query string
2899 */
2900 public String getEscapedPathQuery() {
2901 char[] rawPathQuery = getRawPathQuery();
2902 return (rawPathQuery == null) ? null : new String(rawPathQuery);
2903 }
2904
2905
2906 /***
2907 * Get the path and query.
2908 *
2909 * @return the path and query string.
2910 * @exception IOException incomplete trailing escape pattern
2911 * Or unsupported character encoding
2912 * @see #decode
2913 */
2914 public String getPathQuery() throws IOException {
2915 char[] rawPathQuery = getRawPathQuery();
2916 return (rawPathQuery == null) ? null : decode(rawPathQuery);
2917 }
2918
2919
2920
2921 /***
2922 * Set the raw-escaped query.
2923 *
2924 * @param escapedQuery the raw-escaped query
2925 * @exception IOException escaped query not valid
2926 * @throws NullPointerException null query
2927 */
2928 public void setRawQuery(char[] escapedQuery) throws IOException {
2929 if (!validate(escapedQuery, query))
2930 throw new IOException(
2931 "URI: escaped query not valid");
2932 _query = escapedQuery;
2933 setUriReference();
2934 }
2935
2936
2937 /***
2938 * Set the escaped query string.
2939 *
2940 * @param escapedQuery the escaped query string
2941 * @exception IOException escaped query not valid
2942 * @throws NullPointerException null query
2943 */
2944 public void setEscapedQuery(String escapedQuery) throws IOException {
2945 setRawQuery(escapedQuery.toCharArray());
2946 }
2947
2948
2949 /***
2950 * Set the query.
2951 * When a query string is not misunderstood the reserved special characters
2952 * ("&", "=", "+", ",", and "$") within a query component, it is
2953 * recommended to use in encoding the whole query with this method.
2954 *
2955 * @param query the query string.
2956 * @exception IOException incomplete trailing escape pattern
2957 * Or unsupported character encoding
2958 * @throws NullPointerException null query
2959 * @see #encode
2960 */
2961 public void setQuery(String query) throws IOException {
2962 setRawQuery(encode(query, allowed_query));
2963 }
2964
2965
2966 /***
2967 * Get the raw-escaped query.
2968 *
2969 * @return the raw-escaped query
2970 */
2971 public char[] getRawQuery() {
2972 return _query;
2973 }
2974
2975
2976 /***
2977 * Get the escaped query.
2978 *
2979 * @return the escaped query string
2980 */
2981 public String getEscapedQuery() {
2982 return (_query == null) ? null : new String(_query);
2983 }
2984
2985
2986 /***
2987 * Get the query.
2988 *
2989 * @return the query string.
2990 * @exception IOException incomplete trailing escape pattern
2991 * Or unsupported character encoding
2992 * @see #decode
2993 */
2994 public String getQuery() throws IOException {
2995 return (_query == null) ? null : decode(_query);
2996 }
2997
2998
2999
3000 /***
3001 * Set the raw-escaped fragment.
3002 *
3003 * @param escapedFragment the raw-escaped fragment
3004 * @exception IOException escaped fragment not valid
3005 * @throws NullPointerException null fragment
3006 */
3007 public void setRawFragment(char[] escapedFragment) throws IOException {
3008 if (!validate(escapedFragment, fragment))
3009 throw new IOException(
3010 "URI: escaped fragment not valid");
3011 _fragment = escapedFragment;
3012 setUriReference();
3013 }
3014
3015
3016 /***
3017 * Set the escaped fragment string.
3018 *
3019 * @param escapedFragment the escaped fragment string
3020 * @exception IOException escaped fragment not valid
3021 * @throws NullPointerException null fragment
3022 */
3023 public void setEscapedFragment(String escapedFragment) throws IOException {
3024 char[] fragmentSequence = escapedFragment.toCharArray();
3025 if (!validate(fragmentSequence, fragment))
3026 throw new IOException(
3027 "URI: escaped fragment not valid");
3028 _fragment = fragmentSequence;
3029 setUriReference();
3030 }
3031
3032
3033 /***
3034 * Set the fragment.
3035 *
3036 * @param the fragment string.
3037 * @exception IOException
3038 * Or unsupported character encoding
3039 * @throws NullPointerException null fragment
3040 */
3041 public void setFragment(String fragment) throws IOException {
3042 _fragment = encode(fragment, allowed_fragment);
3043 setUriReference();
3044 }
3045
3046
3047 /***
3048 * Get the raw-escaped fragment.
3049 * <p>
3050 * The optional fragment identifier is not part of a URI, but is often used
3051 * in conjunction with a URI.
3052 * <p>
3053 * The format and interpretation of fragment identifiers is dependent on
3054 * the media type [RFC2046] of the retrieval result.
3055 * <p>
3056 * A fragment identifier is only meaningful when a URI reference is
3057 * intended for retrieval and the result of that retrieval is a document
3058 * for which the identified fragment is consistently defined.
3059 *
3060 * @return the raw-escaped fragment
3061 */
3062 public char[] getRawFragment() {
3063 return _fragment;
3064 }
3065
3066
3067 /***
3068 * Get the escaped fragment.
3069 *
3070 * @return the escaped fragment string
3071 */
3072 public String getEscapedFragment() {
3073 return (_fragment == null) ? null : new String(_fragment);
3074 }
3075
3076
3077 /***
3078 * Get the fragment.
3079 *
3080 * @return the fragment string
3081 * @exception IOException incomplete trailing escape pattern
3082 * Or unsupported character encoding
3083 * @see #decode
3084 */
3085 public String getFragment() throws IOException {
3086 return (_fragment == null) ? null : decode(_fragment);
3087 }
3088
3089
3090
3091 /***
3092 * Normalize the given hier path part.
3093 *
3094 * @param path the path to normalize
3095 * @return the normalized path
3096 */
3097 protected char[] normalize(char[] path) {
3098
3099 if (path == null) return null;
3100
3101 String normalized = new String(path);
3102 boolean endsWithSlash = true;
3103
3104 if (!normalized.endsWith("/")) {
3105 normalized += '/';
3106 endsWithSlash = false;
3107 }
3108 if (normalized.endsWith("/./") || normalized.endsWith("/../")) {
3109 endsWithSlash = true;
3110 }
3111
3112 while (true) {
3113 int at = normalized.indexOf("/./");
3114 if (at == -1) {
3115 break;
3116 }
3117 normalized = normalized.substring(0, at) +
3118 normalized.substring(at + 2);
3119 }
3120
3121 while (true) {
3122 int at = normalized.indexOf("/../");
3123 if (at == -1) {
3124 break;
3125 }
3126 if (at == 0) {
3127 normalized = "/";
3128 break;
3129 }
3130 int backward = normalized.lastIndexOf('/', at - 1);
3131 if (backward == -1) {
3132
3133 normalized = normalized.substring(at + 4);
3134 } else {
3135 normalized = normalized.substring(0, backward) +
3136 normalized.substring(at + 3);
3137 }
3138 }
3139
3140 while (true) {
3141 int at = normalized.indexOf("//");
3142 if (at == -1) {
3143 break;
3144 }
3145 normalized = normalized.substring(0, at) +
3146 normalized.substring(at + 1);
3147 }
3148 if (!endsWithSlash && normalized.endsWith("/")) {
3149 normalized = normalized.substring(0, normalized.length()-1);
3150 } else if (endsWithSlash && !normalized.endsWith("/")) {
3151 normalized = normalized + "/";
3152 }
3153
3154 return normalized.toCharArray();
3155 }
3156
3157
3158 /***
3159 * Normalize the path part of this URI.
3160 */
3161 public void normalize() {
3162 _path = normalize(_path);
3163 }
3164
3165
3166 /***
3167 * Test if the first array is equal to the second array.
3168 *
3169 * @param first the first character array
3170 * @param second the second character array
3171 * @return true if they're equal
3172 */
3173 protected boolean equals(char[] first, char[] second) {
3174
3175 if (first == null && second == null) {
3176 return true;
3177 }
3178 if (first == null || second == null) {
3179 return false;
3180 }
3181 if (first.length != second.length) {
3182 return false;
3183 }
3184 for (int i = 0; i < first.length; i++) {
3185 if (first[i] != second[i]) {
3186 return false;
3187 }
3188 }
3189 return true;
3190 }
3191
3192
3193 /***
3194 * Test an object if this URI is equal to another.
3195 *
3196 * @param obj an object to compare
3197 * @return true if two URI objects are equal
3198 */
3199 public boolean equals(Object obj) {
3200
3201
3202 if (obj == this) {
3203 return true;
3204 }
3205 if (!(obj instanceof URI)) {
3206 return false;
3207 }
3208 URI another = (URI) obj;
3209
3210 if (!equals(_scheme, another._scheme)) {
3211 return false;
3212 }
3213
3214 if (!equals(_opaque, another._opaque)) {
3215 return false;
3216 }
3217
3218
3219 if (!equals(_authority, another._authority)) {
3220 return false;
3221 }
3222
3223 if (!equals(_path, another._path)) {
3224 return false;
3225 }
3226
3227 if (!equals(_query, another._query)) {
3228 return false;
3229 }
3230
3231 if (!equals(_fragment, another._fragment)) {
3232 return false;
3233 }
3234 return true;
3235 }
3236
3237
3238
3239 /***
3240 * Write the content of this URI.
3241 *
3242 * @param oos the object-output stream
3243 */
3244 protected void writeObject(java.io.ObjectOutputStream oos)
3245 throws IOException {
3246
3247 oos.defaultWriteObject();
3248 }
3249
3250
3251 /***
3252 * Read a URI.
3253 *
3254 * @param ois the object-input stream
3255 */
3256 protected void readObject(java.io.ObjectInputStream ois)
3257 throws ClassNotFoundException, IOException {
3258
3259 ois.defaultReadObject();
3260 }
3261
3262
3263
3264 /***
3265 * Compare this URI to another object.
3266 *
3267 * @param obj the object to be compared.
3268 * @return 0, if it's same,
3269 * -1, if failed, first being compared with in the authority component
3270 * @exception ClassCastException not URI argument
3271 * @throws NullPointerException null object
3272 */
3273 public int compareTo(Object obj) {
3274
3275 URI another = (URI) obj;
3276 if (!equals(_authority, another.getRawAuthority())) return -1;
3277 return toString().compareTo(another.toString());
3278 }
3279
3280
3281
3282 /***
3283 * Create and return a copy of this object, the URI-reference containing
3284 * the userinfo component. Notice that the whole URI-reference including
3285 * the userinfo component counld not be gotten as a <code>String</code>.
3286 * <p>
3287 * To copy the identical <code>URI</code> object including the userinfo
3288 * component, it should be used.
3289 *
3290 * @return a clone of this instance
3291 */
3292 public synchronized Object clone() {
3293
3294 URI instance = new URI();
3295
3296 instance._uri = _uri;
3297 instance._scheme = _scheme;
3298 instance._opaque = _opaque;
3299 instance._authority = _authority;
3300 instance._userinfo = _userinfo;
3301 instance._host = _host;
3302 instance._port = _port;
3303 instance._path = _path;
3304 instance._query = _query;
3305 instance._fragment = _fragment;
3306
3307 instance._is_hier_part = _is_hier_part;
3308 instance._is_opaque_part = _is_opaque_part;
3309 instance._is_net_path = _is_net_path;
3310 instance._is_abs_path = _is_abs_path;
3311 instance._is_rel_path = _is_rel_path;
3312 instance._is_reg_name = _is_reg_name;
3313 instance._is_server = _is_server;
3314 instance._is_hostname = _is_hostname;
3315 instance._is_IPv4address = _is_IPv4address;
3316 instance._is_IPv6reference = _is_IPv6reference;
3317
3318 return instance;
3319 }
3320
3321
3322
3323 /***
3324 * It can be gotten the URI character sequence. It's raw-escaped.
3325 * For the purpose of the protocol to be transported, it will be useful.
3326 * <p>
3327 * It is clearly unwise to use a URL that contains a password which is
3328 * intended to be secret. In particular, the use of a password within
3329 * the 'userinfo' component of a URL is strongly disrecommended except
3330 * in those rare cases where the 'password' parameter is intended to be
3331 * public.
3332 * <p>
3333 * When you want to get each part of the userinfo, you need to use the
3334 * specific methods in the specific URL. It depends on the specific URL.
3335 *
3336 * @return URI character sequence
3337 */
3338 public char[] getRawURI() {
3339 return _uri;
3340 }
3341
3342
3343 /***
3344 * It can be gotten the URI character sequence. It's escaped.
3345 * For the purpose of the protocol to be transported, it will be useful.
3346 *
3347 * @return the URI string
3348 */
3349 public String getEscapedURI() {
3350 return (_uri == null) ? null : new String(_uri);
3351 }
3352
3353
3354 /***
3355 * It can be gotten the URI character sequence.
3356 *
3357 * @return the URI string
3358 * @exception IOException incomplete trailing escape pattern
3359 * Or unsupported character encoding
3360 * @see #decode
3361 */
3362 public String getURI() throws IOException {
3363 return (_uri == null) ? null : decode(_uri);
3364 }
3365
3366
3367 /***
3368 * Get the escaped URI string.
3369 * <p>
3370 * On the document, the URI-reference form is only used without the userinfo
3371 * component like http://jakarta.apache.org/ by the security reason.
3372 * But the URI-reference form with the userinfo component could be parsed.
3373 * <p>
3374 * In other words, this URI and any its subclasses must not expose the
3375 * URI-reference expression with the userinfo component like
3376 * http://user:password@hostport/restricted_zone.<br>
3377 * It means that the API client programmer should extract each user and
3378 * password to access manually. Probably it will be supported in the each
3379 * subclass, however, not a whole URI-reference expression.
3380 *
3381 * @return the URI string
3382 * @see #clone()
3383 */
3384 public String toString() {
3385 return getEscapedURI();
3386 }
3387
3388
3389
3390
3391 /***
3392 * A mapping to determine the (somewhat arbitrarily) preferred charset for
3393 * a given locale. Supports all locales recognized in JDK 1.1.
3394 * <p>
3395 * The distribution of this class is Servlets.com. It was originally
3396 * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3397 */
3398 public static class LocaleToCharsetMap {
3399
3400 private static Hashtable map;
3401 static {
3402 map = new Hashtable();
3403 map.put("ar", "ISO-8859-6");
3404 map.put("be", "ISO-8859-5");
3405 map.put("bg", "ISO-8859-5");
3406 map.put("ca", "ISO-8859-1");
3407 map.put("cs", "ISO-8859-2");
3408 map.put("da", "ISO-8859-1");
3409 map.put("de", "ISO-8859-1");
3410 map.put("el", "ISO-8859-7");
3411 map.put("en", "ISO-8859-1");
3412 map.put("es", "ISO-8859-1");
3413 map.put("et", "ISO-8859-1");
3414 map.put("fi", "ISO-8859-1");
3415 map.put("fr", "ISO-8859-1");
3416 map.put("hr", "ISO-8859-2");
3417 map.put("hu", "ISO-8859-2");
3418 map.put("is", "ISO-8859-1");
3419 map.put("it", "ISO-8859-1");
3420 map.put("iw", "ISO-8859-8");
3421 map.put("ja", "Shift_JIS");
3422 map.put("ko", "EUC-KR");
3423 map.put("lt", "ISO-8859-2");
3424 map.put("lv", "ISO-8859-2");
3425 map.put("mk", "ISO-8859-5");
3426 map.put("nl", "ISO-8859-1");
3427 map.put("no", "ISO-8859-1");
3428 map.put("pl", "ISO-8859-2");
3429 map.put("pt", "ISO-8859-1");
3430 map.put("ro", "ISO-8859-2");
3431 map.put("ru", "ISO-8859-5");
3432 map.put("sh", "ISO-8859-5");
3433 map.put("sk", "ISO-8859-2");
3434 map.put("sl", "ISO-8859-2");
3435 map.put("sq", "ISO-8859-2");
3436 map.put("sr", "ISO-8859-5");
3437 map.put("sv", "ISO-8859-1");
3438 map.put("tr", "ISO-8859-9");
3439 map.put("uk", "ISO-8859-5");
3440 map.put("zh", "GB2312");
3441 map.put("zh_TW", "Big5");
3442 }
3443
3444 /***
3445 * Get the preferred charset for the given locale.
3446 *
3447 * @param locale the locale
3448 * @return the preferred charset
3449 * or null if the locale is not recognized
3450 */
3451 public static String getCharset(Locale locale) {
3452
3453 String charset = (String) map.get(locale.toString());
3454 if (charset != null) return charset;
3455
3456
3457 charset = (String) map.get(locale.getLanguage());
3458 return charset;
3459 }
3460
3461 }
3462
3463 }
3464