Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||||||
URI |
|
| 2.8932038834951457;2.893 |
1 | /* |
|
2 | * $Header$ |
|
3 | * $Revision: 905 $ |
|
4 | * $Date: 2006-02-19 01:44:03 +0000 (Sun, 19 Feb 2006) $ |
|
5 | * |
|
6 | * ==================================================================== |
|
7 | * |
|
8 | * The Apache Software License, Version 1.1 |
|
9 | * |
|
10 | * Copyright (c) 2002 the Apache Software Foundation. All rights |
|
11 | * reserved. |
|
12 | * |
|
13 | * Redistribution and use in source and binary forms, with or without |
|
14 | * modification, are permitted provided that the following conditions |
|
15 | * are met: |
|
16 | * |
|
17 | * 1. Redistributions of source code must retain the above copyright |
|
18 | * notice, this list of conditions and the following disclaimer. |
|
19 | * |
|
20 | * 2. Redistributions in binary form must reproduce the above copyright |
|
21 | * notice, this list of conditions and the following disclaimer in |
|
22 | * the documentation and/or other materials provided with the |
|
23 | * distribution. |
|
24 | * |
|
25 | * 3. The end-user documentation included with the redistribution, if |
|
26 | * any, must include the following acknowlegement: |
|
27 | * "This product includes software developed by the |
|
28 | * Apache Software Foundation (http://www.apache.org/)." |
|
29 | * Alternately, this acknowlegement may appear in the software itself, |
|
30 | * if and wherever such third-party acknowlegements normally appear. |
|
31 | * |
|
32 | * 4. The names "The Jakarta Project", "HttpClient", and "Apache Software |
|
33 | * Foundation" must not be used to endorse or promote products derived |
|
34 | * from this software without prior written permission. For written |
|
35 | * permission, please contact apache@apache.org. |
|
36 | * |
|
37 | * 5. Products derived from this software may not be called "Apache" |
|
38 | * nor may "Apache" appear in their names without prior written |
|
39 | * permission of the Apache Group. |
|
40 | * |
|
41 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED |
|
42 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
|
43 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
|
44 | * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR |
|
45 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
46 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
47 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF |
|
48 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
|
49 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
50 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT |
|
51 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
|
52 | * SUCH DAMAGE. |
|
53 | * ==================================================================== |
|
54 | * |
|
55 | * This software consists of voluntary contributions made by many |
|
56 | * individuals on behalf of the Apache Software Foundation. For more |
|
57 | * information on the Apache Software Foundation, please see |
|
58 | * <http://www.apache.org/>. |
|
59 | * |
|
60 | * [Additional notices, if required by prior licensing conditions] |
|
61 | * |
|
62 | */ |
|
63 | ||
64 | // excellent class borrowed from Apache Commons project: |
|
65 | //package org.apache.commons.httpclient; |
|
66 | ||
67 | package net.wotonomy.web; |
|
68 | ||
69 | import java.io.IOException; |
|
70 | import java.io.Serializable; |
|
71 | import java.io.UnsupportedEncodingException; |
|
72 | import java.net.URL; |
|
73 | import java.security.AccessController; |
|
74 | import java.util.BitSet; |
|
75 | import java.util.Hashtable; |
|
76 | import java.util.Locale; |
|
77 | ||
78 | import sun.security.action.GetPropertyAction; |
|
79 | ||
80 | /** |
|
81 | * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396. |
|
82 | * This class has the purpose of supportting of parsing a URI reference to |
|
83 | * extend any specific protocols, the character encoding of the protocol to |
|
84 | * be transported and the charset of the document. |
|
85 | * <p> |
|
86 | * A URI is always in an "escaped" form, since escaping or unescaping a |
|
87 | * completed URI might change its semantics. |
|
88 | * <p> |
|
89 | * Implementers should be careful not to escape or unescape the same string |
|
90 | * more than once, since unescaping an already unescaped string might lead to |
|
91 | * misinterpreting a percent data character as another escaped character, |
|
92 | * or vice versa in the case of escaping an already escaped string. |
|
93 | * <p> |
|
94 | * In order to avoid these problems, data types used as follows: |
|
95 | * <p><blockquote><pre> |
|
96 | * URI character sequence: char |
|
97 | * octet sequence: byte |
|
98 | * original character sequence: String |
|
99 | * </pre></blockquote><p> |
|
100 | * |
|
101 | * So, a URI is a sequence of characters as an array of a char type, which |
|
102 | * is not always represented as a sequence of octets as an array of byte. |
|
103 | * <p> |
|
104 | * |
|
105 | * URI Syntactic Components |
|
106 | * <p><blockquote><pre> |
|
107 | * - In general, written as follows: |
|
108 | * Absolute URI = <scheme>:<scheme-specific-part> |
|
109 | * Generic URI = <scheme>://<authority><path>?<query> |
|
110 | * |
|
111 | * - Syntax |
|
112 | * absoluteURI = scheme ":" ( hier_part | opaque_part ) |
|
113 | * hier_part = ( net_path | abs_path ) [ "?" query ] |
|
114 | * net_path = "//" authority [ abs_path ] |
|
115 | * abs_path = "/" path_segments |
|
116 | * </pre></blockquote><p> |
|
117 | * |
|
118 | * The following examples illustrate URI that are in common use. |
|
119 | * <pre> |
|
120 | * ftp://ftp.is.co.za/rfc/rfc1808.txt |
|
121 | * -- ftp scheme for File Transfer Protocol services |
|
122 | * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles |
|
123 | * -- gopher scheme for Gopher and Gopher+ Protocol services |
|
124 | * http://www.math.uio.no/faq/compression-faq/part1.html |
|
125 | * -- http scheme for Hypertext Transfer Protocol services |
|
126 | * mailto:mduerst@ifi.unizh.ch |
|
127 | * -- mailto scheme for electronic mail addresses |
|
128 | * news:comp.infosystems.www.servers.unix |
|
129 | * -- news scheme for USENET news groups and articles |
|
130 | * telnet://melvyl.ucop.edu/ |
|
131 | * -- telnet scheme for interactive services via the TELNET Protocol |
|
132 | * </pre> |
|
133 | * Please, notice that there are many modifications from URL(RFC 1738) and |
|
134 | * relative URL(RFC 1808). |
|
135 | * <p> |
|
136 | * <b>The expressions for a URI</b> |
|
137 | * <p><pre> |
|
138 | * For escaped URI forms |
|
139 | * - URI(char[]) // constructor |
|
140 | * - char[] getRawXxx() // method |
|
141 | * - String getEscapedXxx() // method |
|
142 | * - String toString() // method |
|
143 | * <p> |
|
144 | * For unescaped URI forms |
|
145 | * - URI(String) // constructor |
|
146 | * - String getXXX() // method |
|
147 | * </pre><p> |
|
148 | * |
|
149 | * @author <a href="mailto:jericho@apache.org">Sung-Gu</a> |
|
150 | * @version $Revision: 905 $ $Date: 2002/03/14 15:14:01 |
|
151 | */ |
|
152 | 0 | class URI implements Cloneable, Comparable, Serializable { |
153 | ||
154 | ||
155 | // ----------------------------------------------------------- Constructors |
|
156 | ||
157 | 0 | protected URI() { |
158 | 0 | } |
159 | ||
160 | /** |
|
161 | * Construct a URI as an escaped form of a character array. |
|
162 | * An URI can be placed within double-quotes or angle brackets like |
|
163 | * "http://test.com/" and <http://test.com/> |
|
164 | * |
|
165 | * @param escaped the URI character sequence |
|
166 | * @exception IOException |
|
167 | * @throws NullPointerException if <code>escaped</code> is <code>null</code> |
|
168 | */ |
|
169 | 0 | public URI(char[] escaped) throws IOException { |
170 | 0 | parseUriReference(new String(escaped), true); |
171 | 0 | } |
172 | ||
173 | ||
174 | /** |
|
175 | * Construct a URI from the given string. |
|
176 | * <p><blockquote><pre> |
|
177 | * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
|
178 | * </pre></blockquote><p> |
|
179 | * An URI can be placed within double-quotes or angle brackets like |
|
180 | * "http://test.com/" and <http://test.com/> |
|
181 | * |
|
182 | * @param original the string to be represented to URI character sequence |
|
183 | * It is one of absoluteURI and relativeURI. |
|
184 | * @exception IOException |
|
185 | */ |
|
186 | 0 | public URI(String original) throws IOException { |
187 | 0 | parseUriReference(original, false); |
188 | 0 | } |
189 | ||
190 | /** |
|
191 | * Construct a URI from a URL. |
|
192 | * |
|
193 | * @param url a valid URL. |
|
194 | * @throws IOException |
|
195 | * @since 2.0 |
|
196 | */ |
|
197 | public URI(URL url) throws IOException { |
|
198 | 0 | this(url.toString()); |
199 | 0 | } |
200 | ||
201 | ||
202 | /** |
|
203 | * Construct a general URI from the given components. |
|
204 | * <p><blockquote><pre> |
|
205 | * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
|
206 | * absoluteURI = scheme ":" ( hier_part | opaque_part ) |
|
207 | * opaque_part = uric_no_slash *uric |
|
208 | * </pre></blockquote><p> |
|
209 | * It's for absolute URI = <scheme>:<scheme-specific-part># |
|
210 | * <fragment>. |
|
211 | * |
|
212 | * @param scheme the scheme string |
|
213 | * @param scheme_specific_part scheme_specific_part |
|
214 | * @param fragment the fragment string |
|
215 | * @exception IOException |
|
216 | */ |
|
217 | 0 | public URI(String scheme, String scheme_specific_part, String fragment) |
218 | 0 | throws IOException { |
219 | ||
220 | // validate and contruct the URI character sequence |
|
221 | 0 | if (scheme == null) { |
222 | 0 | throw new IOException(/*IOException.PARSING,*/ "URI: scheme required"); |
223 | } |
|
224 | 0 | char[] s = scheme.toLowerCase().toCharArray(); |
225 | 0 | if (validate(s, URI.scheme)) { |
226 | 0 | _scheme = s; // is_absoluteURI |
227 | 0 | } else { |
228 | 0 | throw new IOException(/*IOException.PARSING,*/ "URI: incorrect scheme"); |
229 | } |
|
230 | 0 | _opaque = encode(scheme_specific_part, allowed_opaque_part); |
231 | // Set flag |
|
232 | 0 | _is_opaque_part = true; |
233 | 0 | setUriReference(); |
234 | 0 | } |
235 | ||
236 | ||
237 | /** |
|
238 | * Construct a general URI from the given components. |
|
239 | * <p><blockquote><pre> |
|
240 | * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
|
241 | * absoluteURI = scheme ":" ( hier_part | opaque_part ) |
|
242 | * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] |
|
243 | * hier_part = ( net_path | abs_path ) [ "?" query ] |
|
244 | * </pre></blockquote><p> |
|
245 | * It's for absolute URI = <scheme>:<path>?<query>#< |
|
246 | * fragment> and relative URI = <path>?<query>#<fragment |
|
247 | * >. |
|
248 | * |
|
249 | * @param scheme the scheme string |
|
250 | * @param authority the authority string |
|
251 | * @param path the path string |
|
252 | * @param query the query string |
|
253 | * @param fragment the fragment string |
|
254 | * @exception IOException |
|
255 | */ |
|
256 | 0 | public URI(String scheme, String authority, String path, String query, |
257 | 0 | String fragment) throws IOException { |
258 | ||
259 | // validate and contruct the URI character sequence |
|
260 | 0 | StringBuffer buff = new StringBuffer(); |
261 | 0 | if (scheme != null) { |
262 | 0 | buff.append(scheme); |
263 | 0 | buff.append(':'); |
264 | } |
|
265 | 0 | if (authority != null) { |
266 | 0 | buff.append("//"); |
267 | 0 | buff.append(authority); |
268 | } |
|
269 | 0 | if (path != null) { // accept empty path |
270 | 0 | if ((scheme != null || authority != null) |
271 | 0 | && !path.startsWith("/")) { |
272 | 0 | throw new IOException(/*IOException.PARSING*,*/ |
273 | 0 | "URI: abs_path requested"); |
274 | } |
|
275 | 0 | buff.append(path); |
276 | } |
|
277 | 0 | if (query != null) { |
278 | 0 | buff.append('?'); |
279 | 0 | buff.append(query); |
280 | } |
|
281 | 0 | if (fragment != null) { |
282 | 0 | buff.append('#'); |
283 | 0 | buff.append(fragment); |
284 | } |
|
285 | 0 | parseUriReference(buff.toString(), false); |
286 | 0 | } |
287 | ||
288 | ||
289 | /** |
|
290 | * Construct a general URI from the given components. |
|
291 | * |
|
292 | * @param scheme the scheme string |
|
293 | * @param userinfo the userinfo string |
|
294 | * @param host the host string |
|
295 | * @param port the port number |
|
296 | * @exception IOException |
|
297 | */ |
|
298 | public URI(String scheme, String userinfo, String host, int port) |
|
299 | throws IOException { |
|
300 | ||
301 | 0 | this(scheme, userinfo, host, port, null, null, null); |
302 | 0 | } |
303 | ||
304 | ||
305 | /** |
|
306 | * Construct a general URI from the given components. |
|
307 | * |
|
308 | * @param scheme the scheme string |
|
309 | * @param userinfo the userinfo string |
|
310 | * @param host the host string |
|
311 | * @param port the port number |
|
312 | * @param path the path string |
|
313 | * @exception IOException |
|
314 | */ |
|
315 | public URI(String scheme, String userinfo, String host, int port, |
|
316 | String path) throws IOException { |
|
317 | ||
318 | 0 | this(scheme, userinfo, host, port, path, null, null); |
319 | 0 | } |
320 | ||
321 | ||
322 | /** |
|
323 | * Construct a general URI from the given components. |
|
324 | * |
|
325 | * @param scheme the scheme string |
|
326 | * @param userinfo the userinfo string |
|
327 | * @param host the host string |
|
328 | * @param port the port number |
|
329 | * @param path the path string |
|
330 | * @param query the query string |
|
331 | * @exception IOException |
|
332 | */ |
|
333 | public URI(String scheme, String userinfo, String host, int port, |
|
334 | String path, String query) throws IOException { |
|
335 | ||
336 | 0 | this(scheme, userinfo, host, port, path, query, null); |
337 | 0 | } |
338 | ||
339 | ||
340 | /** |
|
341 | * Construct a general URI from the given components. |
|
342 | * |
|
343 | * @param scheme the scheme string |
|
344 | * @param userinfo the userinfo string |
|
345 | * @param host the host string |
|
346 | * @param port the port number |
|
347 | * @param path the path string |
|
348 | * @param query the query string |
|
349 | * @param fragment the fragment string |
|
350 | * @exception IOException |
|
351 | */ |
|
352 | public URI(String scheme, String userinfo, String host, int port, |
|
353 | String path, String query, String fragment) throws IOException { |
|
354 | ||
355 | 0 | this(scheme, (host == null) ? null : |
356 | 0 | ((userinfo != null) ? userinfo + '@' : "") + host + |
357 | 0 | ((port != -1) ? ":" + port : ""), path, query, fragment); |
358 | 0 | } |
359 | ||
360 | ||
361 | /** |
|
362 | * Construct a general URI from the given components. |
|
363 | * |
|
364 | * @param scheme the scheme string |
|
365 | * @param host the host string |
|
366 | * @param path the path string |
|
367 | * @param fragment the fragment string |
|
368 | * @exception IOException |
|
369 | */ |
|
370 | public URI(String scheme, String host, String path, String fragment) |
|
371 | throws IOException { |
|
372 | ||
373 | 0 | this(scheme, host, path, null, fragment); |
374 | 0 | } |
375 | ||
376 | ||
377 | /** |
|
378 | * Construct a general URI with the given relative URI string. |
|
379 | * |
|
380 | * @param base the base URI |
|
381 | * @param relative the relative URI string |
|
382 | * @exception IOException |
|
383 | */ |
|
384 | public URI(URI base, String relative) throws IOException { |
|
385 | 0 | this(base, new URI(relative)); |
386 | 0 | } |
387 | ||
388 | ||
389 | /** |
|
390 | * Construct a general URI with the given relative URI. |
|
391 | * <p><blockquote><pre> |
|
392 | * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
|
393 | * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] |
|
394 | * </pre></blockquote><p> |
|
395 | * Resolving Relative References to Absolute Form. |
|
396 | * |
|
397 | * <strong>Examples of Resolving Relative URI References</strong> |
|
398 | * |
|
399 | * Within an object with a well-defined base URI of |
|
400 | * <p><blockquote><pre> |
|
401 | * http://a/b/c/d;p?q |
|
402 | * </pre></blockquote><p> |
|
403 | * the relative URI would be resolved as follows: |
|
404 | * |
|
405 | * Normal Examples |
|
406 | * |
|
407 | * <p><blockquote><pre> |
|
408 | * g:h = g:h |
|
409 | * g = http://a/b/c/g |
|
410 | * ./g = http://a/b/c/g |
|
411 | * g/ = http://a/b/c/g/ |
|
412 | * /g = http://a/g |
|
413 | * //g = http://g |
|
414 | * ?y = http://a/b/c/?y |
|
415 | * g?y = http://a/b/c/g?y |
|
416 | * #s = (current document)#s |
|
417 | * g#s = http://a/b/c/g#s |
|
418 | * g?y#s = http://a/b/c/g?y#s |
|
419 | * ;x = http://a/b/c/;x |
|
420 | * g;x = http://a/b/c/g;x |
|
421 | * g;x?y#s = http://a/b/c/g;x?y#s |
|
422 | * . = http://a/b/c/ |
|
423 | * ./ = http://a/b/c/ |
|
424 | * .. = http://a/b/ |
|
425 | * ../ = http://a/b/ |
|
426 | * ../g = http://a/b/g |
|
427 | * ../.. = http://a/ |
|
428 | * ../../ = http://a/ |
|
429 | * ../../g = http://a/g |
|
430 | * </pre></blockquote><p> |
|
431 | * |
|
432 | * Some URI schemes do not allow a hierarchical syntax matching the |
|
433 | * <hier_part> syntax, and thus cannot use relative references. |
|
434 | * |
|
435 | * @param base the base URI |
|
436 | * @param relative the relative URI |
|
437 | * @exception IOException |
|
438 | */ |
|
439 | 0 | public URI(URI base, URI relative) throws IOException { |
440 | ||
441 | 0 | if (base._scheme == null) { |
442 | 0 | throw new IOException(/* IOException.PARSING,*/ "URI: base URI required"); |
443 | } |
|
444 | 0 | if (base._scheme != null) { |
445 | 0 | this._scheme = base._scheme; |
446 | 0 | this._authority = base._authority; |
447 | } |
|
448 | 0 | if (base._is_opaque_part || relative._is_opaque_part) { |
449 | 0 | this._scheme = base._scheme; |
450 | 0 | this._is_opaque_part = relative._is_opaque_part; |
451 | 0 | this._opaque = relative._opaque; |
452 | 0 | this._fragment = relative._fragment; |
453 | 0 | this.setUriReference(); |
454 | 0 | return; |
455 | } |
|
456 | 0 | if (relative._scheme != null) { |
457 | 0 | this._scheme = relative._scheme; |
458 | 0 | this._is_net_path = relative._is_net_path; |
459 | 0 | this._authority = relative._authority; |
460 | 0 | if (relative._is_server) { |
461 | 0 | this._userinfo = relative._userinfo; |
462 | 0 | this._host = relative._host; |
463 | 0 | this._port = relative._port; |
464 | 0 | } else if (relative._is_reg_name) { |
465 | 0 | this._is_reg_name = relative._is_reg_name; |
466 | } |
|
467 | 0 | this._is_abs_path = relative._is_abs_path; |
468 | 0 | this._is_rel_path = relative._is_rel_path; |
469 | 0 | this._path = relative._path; |
470 | 0 | } else if (base._authority != null && relative._scheme == null) { |
471 | 0 | this._is_net_path = base._is_net_path; |
472 | 0 | this._authority = base._authority; |
473 | 0 | if (base._is_server) { |
474 | 0 | this._userinfo = base._userinfo; |
475 | 0 | this._host = base._host; |
476 | 0 | this._port = base._port; |
477 | 0 | } else if (base._is_reg_name) { |
478 | 0 | this._is_reg_name = base._is_reg_name; |
479 | } |
|
480 | } |
|
481 | 0 | if (relative._authority != null) { |
482 | 0 | this._is_net_path = relative._is_net_path; |
483 | 0 | this._authority = relative._authority; |
484 | 0 | if (relative._is_server) { |
485 | 0 | this._is_server = relative._is_server; |
486 | 0 | this._userinfo = relative._userinfo; |
487 | 0 | this._host = relative._host; |
488 | 0 | this._port = relative._port; |
489 | 0 | } else if (relative._is_reg_name) { |
490 | 0 | this._is_reg_name = relative._is_reg_name; |
491 | } |
|
492 | 0 | this._is_abs_path = relative._is_abs_path; |
493 | 0 | this._is_rel_path = relative._is_rel_path; |
494 | 0 | this._path = relative._path; |
495 | } |
|
496 | // resolve the path |
|
497 | 0 | if (relative._scheme == null && relative._authority == null || |
498 | 0 | equals(base._scheme, relative._scheme)) { |
499 | 0 | this._path = resolvePath(base._path, relative._path); |
500 | } |
|
501 | // base._query removed |
|
502 | 0 | if (relative._query != null) { |
503 | 0 | this._query = relative._query; |
504 | } |
|
505 | // base._fragment removed |
|
506 | 0 | if (relative._fragment != null) { |
507 | 0 | this._fragment = relative._fragment; |
508 | } |
|
509 | 0 | this.setUriReference(); |
510 | 0 | } |
511 | ||
512 | // --------------------------------------------------- Instance Variables |
|
513 | ||
514 | static final long serialVersionUID = 604752400577948726L; |
|
515 | ||
516 | ||
517 | /** |
|
518 | * This Uniform Resource Identifier (URI). |
|
519 | * The URI is always in an "escaped" form, since escaping or unescaping |
|
520 | * a completed URI might change its semantics. |
|
521 | */ |
|
522 | 0 | protected char[] _uri = null; |
523 | ||
524 | ||
525 | /** |
|
526 | * The default charset of the protocol. RFC 2277, 2396 |
|
527 | */ |
|
528 | 0 | protected static String _protocolCharset = "UTF-8"; |
529 | ||
530 | ||
531 | /** |
|
532 | * The default charset of the document. RFC 2277, 2396 |
|
533 | * The platform's charset is used for the document by default. |
|
534 | */ |
|
535 | 0 | protected static String _documentCharset = null; |
536 | // Static initializer for _documentCharset |
|
537 | static { |
|
538 | 0 | Locale locale = Locale.getDefault(); |
539 | 0 | if (locale != null) { |
540 | // in order to support backward compatiblity |
|
541 | 0 | _documentCharset = LocaleToCharsetMap.getCharset(locale); |
542 | 0 | } else { |
543 | 0 | _documentCharset = (String)AccessController.doPrivileged( |
544 | 0 | new GetPropertyAction("file.encoding")); |
545 | } |
|
546 | } |
|
547 | ||
548 | /** |
|
549 | * The scheme. |
|
550 | */ |
|
551 | 0 | protected char[] _scheme = null; |
552 | ||
553 | ||
554 | /** |
|
555 | * The opaque. |
|
556 | */ |
|
557 | 0 | protected char[] _opaque = null; |
558 | ||
559 | ||
560 | /** |
|
561 | * The authority. |
|
562 | */ |
|
563 | 0 | protected char[] _authority = null; |
564 | ||
565 | ||
566 | /** |
|
567 | * The userinfo. |
|
568 | */ |
|
569 | 0 | protected char[] _userinfo = null; |
570 | ||
571 | ||
572 | /** |
|
573 | * The host. |
|
574 | */ |
|
575 | 0 | protected char[] _host = null; |
576 | ||
577 | ||
578 | /** |
|
579 | * The port. |
|
580 | */ |
|
581 | 0 | protected int _port = -1; |
582 | ||
583 | ||
584 | /** |
|
585 | * The path. |
|
586 | */ |
|
587 | 0 | protected char[] _path = null; |
588 | ||
589 | ||
590 | /** |
|
591 | * The query. |
|
592 | */ |
|
593 | 0 | protected char[] _query = null; |
594 | ||
595 | ||
596 | /** |
|
597 | * The fragment. |
|
598 | */ |
|
599 | 0 | protected char[] _fragment = null; |
600 | ||
601 | ||
602 | /** |
|
603 | * The root path. |
|
604 | */ |
|
605 | 0 | protected static char[] rootPath = { '/' }; |
606 | ||
607 | // ---------------------- Generous characters for each component validation |
|
608 | ||
609 | /** |
|
610 | * The percent "%" character always has the reserved purpose of being the |
|
611 | * escape indicator, it must be escaped as "%25" in order to be used as |
|
612 | * data within a URI. |
|
613 | */ |
|
614 | 0 | protected static final BitSet percent = new BitSet(256); |
615 | // Static initializer for percent |
|
616 | static { |
|
617 | 0 | percent.set('%'); |
618 | } |
|
619 | ||
620 | ||
621 | /** |
|
622 | * BitSet for digit. |
|
623 | * <p><blockquote><pre> |
|
624 | * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | |
|
625 | * "8" | "9" |
|
626 | * </pre></blockquote><p> |
|
627 | */ |
|
628 | 0 | protected static final BitSet digit = new BitSet(256); |
629 | // Static initializer for digit |
|
630 | static { |
|
631 | 0 | for(int i = '0'; i <= '9'; i++) { |
632 | 0 | digit.set(i); |
633 | } |
|
634 | } |
|
635 | ||
636 | ||
637 | /** |
|
638 | * BitSet for alpha. |
|
639 | * <p><blockquote><pre> |
|
640 | * alpha = lowalpha | upalpha |
|
641 | * </pre></blockquote><p> |
|
642 | */ |
|
643 | 0 | protected static final BitSet alpha = new BitSet(256); |
644 | // Static initializer for alpha |
|
645 | static { |
|
646 | 0 | for (int i = 'a'; i <= 'z'; i++) { |
647 | 0 | alpha.set(i); |
648 | } |
|
649 | 0 | for (int i = 'A'; i <= 'Z'; i++) { |
650 | 0 | alpha.set(i); |
651 | } |
|
652 | } |
|
653 | ||
654 | ||
655 | /** |
|
656 | * BitSet for alphanum (join of alpha & digit). |
|
657 | * <p><blockquote><pre> |
|
658 | * alphanum = alpha | digit |
|
659 | * </pre></blockquote><p> |
|
660 | */ |
|
661 | 0 | protected static final BitSet alphanum = new BitSet(256); |
662 | // Static initializer for alphanum |
|
663 | static { |
|
664 | 0 | alphanum.or(alpha); |
665 | 0 | alphanum.or(digit); |
666 | } |
|
667 | ||
668 | ||
669 | /** |
|
670 | * BitSet for hex. |
|
671 | * <p><blockquote><pre> |
|
672 | * hex = digit | "A" | "B" | "C" | "D" | "E" | "F" | |
|
673 | * "a" | "b" | "c" | "d" | "e" | "f" |
|
674 | * </pre></blockquote><p> |
|
675 | */ |
|
676 | 0 | protected static final BitSet hex = new BitSet(256); |
677 | // Static initializer for hex |
|
678 | static { |
|
679 | 0 | hex.or(digit); |
680 | 0 | for(int i = 'a'; i <= 'f'; i++) { |
681 | 0 | hex.set(i); |
682 | } |
|
683 | 0 | for(int i = 'A'; i <= 'F'; i++) { |
684 | 0 | hex.set(i); |
685 | } |
|
686 | } |
|
687 | ||
688 | ||
689 | /** |
|
690 | * BitSet for escaped. |
|
691 | * <p><blockquote><pre> |
|
692 | * escaped = "%" hex hex |
|
693 | * </pre></blockquote><p> |
|
694 | */ |
|
695 | 0 | protected static final BitSet escaped = new BitSet(256); |
696 | // Static initializer for escaped |
|
697 | static { |
|
698 | 0 | escaped.or(percent); |
699 | 0 | escaped.or(hex); |
700 | } |
|
701 | ||
702 | ||
703 | /** |
|
704 | * BitSet for mark. |
|
705 | * <p><blockquote><pre> |
|
706 | * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | |
|
707 | * "(" | ")" |
|
708 | * </pre></blockquote><p> |
|
709 | */ |
|
710 | 0 | protected static final BitSet mark = new BitSet(256); |
711 | // Static initializer for mark |
|
712 | static { |
|
713 | 0 | mark.set('-'); |
714 | 0 | mark.set('_'); |
715 | 0 | mark.set('.'); |
716 | 0 | mark.set('!'); |
717 | 0 | mark.set('~'); |
718 | 0 | mark.set('*'); |
719 | 0 | mark.set('\''); |
720 | 0 | mark.set('('); |
721 | 0 | mark.set(')'); |
722 | } |
|
723 | ||
724 | ||
725 | /** |
|
726 | * Data characters that are allowed in a URI but do not have a reserved |
|
727 | * purpose are called unreserved. |
|
728 | * <p><blockquote><pre> |
|
729 | * unreserved = alphanum | mark |
|
730 | * </pre></blockquote><p> |
|
731 | */ |
|
732 | 0 | protected static final BitSet unreserved = new BitSet(256); |
733 | // Static initializer for unreserved |
|
734 | static { |
|
735 | 0 | unreserved.or(alphanum); |
736 | 0 | unreserved.or(mark); |
737 | } |
|
738 | ||
739 | ||
740 | /** |
|
741 | * BitSet for reserved. |
|
742 | * <p><blockquote><pre> |
|
743 | * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | |
|
744 | * "$" | "," |
|
745 | * </pre></blockquote><p> |
|
746 | */ |
|
747 | 0 | protected static final BitSet reserved = new BitSet(256); |
748 | // Static initializer for reserved |
|
749 | static { |
|
750 | 0 | reserved.set(';'); |
751 | 0 | reserved.set('/'); |
752 | 0 | reserved.set('?'); |
753 | 0 | reserved.set(':'); |
754 | 0 | reserved.set('@'); |
755 | 0 | reserved.set('&'); |
756 | 0 | reserved.set('='); |
757 | 0 | reserved.set('+'); |
758 | 0 | reserved.set('$'); |
759 | 0 | reserved.set(','); |
760 | } |
|
761 | ||
762 | ||
763 | /** |
|
764 | * BitSet for uric. |
|
765 | * <p><blockquote><pre> |
|
766 | * uric = reserved | unreserved | escaped |
|
767 | * </pre></blockquote><p> |
|
768 | */ |
|
769 | 0 | protected static final BitSet uric = new BitSet(256); |
770 | // Static initializer for uric |
|
771 | static { |
|
772 | 0 | uric.or(reserved); |
773 | 0 | uric.or(unreserved); |
774 | 0 | uric.or(escaped); |
775 | } |
|
776 | ||
777 | ||
778 | /** |
|
779 | * BitSet for fragment (alias for uric). |
|
780 | * <p><blockquote><pre> |
|
781 | * fragment = *uric |
|
782 | * </pre></blockquote><p> |
|
783 | */ |
|
784 | 0 | protected static final BitSet fragment = uric; |
785 | ||
786 | ||
787 | /** |
|
788 | * BitSet for query (alias for uric). |
|
789 | * <p><blockquote><pre> |
|
790 | * query = *uric |
|
791 | * </pre></blockquote><p> |
|
792 | */ |
|
793 | 0 | protected static final BitSet query = uric; |
794 | ||
795 | ||
796 | /** |
|
797 | * BitSet for pchar. |
|
798 | * <p><blockquote><pre> |
|
799 | * pchar = unreserved | escaped | |
|
800 | * ":" | "@" | "&" | "=" | "+" | "$" | "," |
|
801 | * </pre></blockquote><p> |
|
802 | */ |
|
803 | 0 | protected static final BitSet pchar = new BitSet(256); |
804 | // Static initializer for pchar |
|
805 | static { |
|
806 | 0 | pchar.or(unreserved); |
807 | 0 | pchar.or(escaped); |
808 | 0 | pchar.set(':'); |
809 | 0 | pchar.set('@'); |
810 | 0 | pchar.set('&'); |
811 | 0 | pchar.set('='); |
812 | 0 | pchar.set('+'); |
813 | 0 | pchar.set('$'); |
814 | 0 | pchar.set(','); |
815 | } |
|
816 | ||
817 | ||
818 | /** |
|
819 | * BitSet for param (alias for pchar). |
|
820 | * <p><blockquote><pre> |
|
821 | * param = *pchar |
|
822 | * </pre></blockquote><p> |
|
823 | */ |
|
824 | 0 | protected static final BitSet param = pchar; |
825 | ||
826 | ||
827 | /** |
|
828 | * BitSet for segment. |
|
829 | * <p><blockquote><pre> |
|
830 | * segment = *pchar *( ";" param ) |
|
831 | * </pre></blockquote><p> |
|
832 | */ |
|
833 | 0 | protected static final BitSet segment = new BitSet(256); |
834 | // Static initializer for segment |
|
835 | static { |
|
836 | 0 | segment.or(pchar); |
837 | 0 | segment.set(';'); |
838 | 0 | segment.or(param); |
839 | } |
|
840 | ||
841 | ||
842 | /** |
|
843 | * BitSet for path segments. |
|
844 | * <p><blockquote><pre> |
|
845 | * path_segments = segment *( "/" segment ) |
|
846 | * </pre></blockquote><p> |
|
847 | */ |
|
848 | 0 | protected static final BitSet path_segments = new BitSet(256); |
849 | // Static initializer for path_segments |
|
850 | static { |
|
851 | 0 | path_segments.set('/'); |
852 | 0 | path_segments.or(segment); |
853 | } |
|
854 | ||
855 | ||
856 | /** |
|
857 | * URI absolute path. |
|
858 | * <p><blockquote><pre> |
|
859 | * abs_path = "/" path_segments |
|
860 | * </pre><blockquote><p> |
|
861 | */ |
|
862 | 0 | protected static final BitSet abs_path = new BitSet(256); |
863 | // Static initializer for abs_path |
|
864 | static { |
|
865 | 0 | abs_path.set('/'); |
866 | 0 | abs_path.or(path_segments); |
867 | } |
|
868 | ||
869 | ||
870 | /** |
|
871 | * URI bitset for encoding typical non-slash characters. |
|
872 | * <p><blockquote><pre> |
|
873 | * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" | |
|
874 | * "&" | "=" | "+" | "$" | "," |
|
875 | * </pre></blockquote><p> |
|
876 | */ |
|
877 | 0 | protected static final BitSet uric_no_slash = new BitSet(256); |
878 | // Static initializer for uric_no_slash |
|
879 | static { |
|
880 | 0 | uric_no_slash.or(unreserved); |
881 | 0 | uric_no_slash.or(escaped); |
882 | 0 | uric_no_slash.set(';'); |
883 | 0 | uric_no_slash.set('?'); |
884 | 0 | uric_no_slash.set(';'); |
885 | 0 | uric_no_slash.set('@'); |
886 | 0 | uric_no_slash.set('&'); |
887 | 0 | uric_no_slash.set('='); |
888 | 0 | uric_no_slash.set('+'); |
889 | 0 | uric_no_slash.set('$'); |
890 | 0 | uric_no_slash.set(','); |
891 | } |
|
892 | ||
893 | ||
894 | /** |
|
895 | * URI bitset that combines uric_no_slash and uric. |
|
896 | * <p><blockquote><pre> |
|
897 | * opaque_part = uric_no_slash *uric |
|
898 | * </pre></blockquote><p> |
|
899 | */ |
|
900 | 0 | protected static final BitSet opaque_part = new BitSet(256); |
901 | // Static initializer for opaque_part |
|
902 | static { |
|
903 | 0 | opaque_part.or(uric_no_slash); |
904 | 0 | opaque_part.or(uric); |
905 | } |
|
906 | ||
907 | ||
908 | /** |
|
909 | * URI bitset that combines absolute path and opaque part. |
|
910 | * <p><blockquote><pre> |
|
911 | * path = [ abs_path | opaque_part ] |
|
912 | * </pre></blockquote><p> |
|
913 | */ |
|
914 | 0 | protected static final BitSet path = new BitSet(256); |
915 | // Static initializer for path |
|
916 | static { |
|
917 | 0 | path.or(abs_path); |
918 | 0 | path.or(opaque_part); |
919 | } |
|
920 | ||
921 | ||
922 | /** |
|
923 | * Port, a logical alias for digit. |
|
924 | */ |
|
925 | 0 | protected static final BitSet port = digit; |
926 | ||
927 | ||
928 | /** |
|
929 | * Bitset that combines digit and dot fo IPv$address. |
|
930 | * <p><blockquote><pre> |
|
931 | * IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit |
|
932 | * </pre></blockquote><p> |
|
933 | */ |
|
934 | 0 | protected static final BitSet IPv4address = new BitSet(256); |
935 | // Static initializer for IPv4address |
|
936 | static { |
|
937 | 0 | IPv4address.or(digit); |
938 | 0 | IPv4address.set('.'); |
939 | } |
|
940 | ||
941 | ||
942 | /** |
|
943 | * RFC 2373. |
|
944 | * <p><blockquote><pre> |
|
945 | * IPv6address = hexpart [ ":" IPv4address ] |
|
946 | * </pre></blockquote><p> |
|
947 | */ |
|
948 | 0 | protected static final BitSet IPv6address = new BitSet(256); |
949 | // Static initializer for IPv6address reference |
|
950 | static { |
|
951 | 0 | IPv6address.or(hex); // hexpart |
952 | 0 | IPv6address.set(':'); |
953 | 0 | IPv6address.or(IPv4address); |
954 | } |
|
955 | ||
956 | ||
957 | /** |
|
958 | * RFC 2732, 2373. |
|
959 | * <p><blockquote><pre> |
|
960 | * IPv6reference = "[" IPv6address "]" |
|
961 | * </pre></blockquote><p> |
|
962 | */ |
|
963 | 0 | protected static final BitSet IPv6reference = new BitSet(256); |
964 | // Static initializer for IPv6reference |
|
965 | static { |
|
966 | 0 | IPv6reference.set('['); |
967 | 0 | IPv6reference.or(IPv6address); |
968 | 0 | IPv6reference.set(']'); |
969 | } |
|
970 | ||
971 | ||
972 | /** |
|
973 | * BitSet for toplabel. |
|
974 | * <p><blockquote><pre> |
|
975 | * toplabel = alpha | alpha *( alphanum | "-" ) alphanum |
|
976 | * </pre></blockquote><p> |
|
977 | */ |
|
978 | 0 | protected static final BitSet toplabel = new BitSet(256); |
979 | // Static initializer for toplabel |
|
980 | static { |
|
981 | 0 | toplabel.or(alphanum); |
982 | 0 | toplabel.set('-'); |
983 | } |
|
984 | ||
985 | ||
986 | /** |
|
987 | * BitSet for domainlabel. |
|
988 | * <p><blockquote><pre> |
|
989 | * domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum |
|
990 | * </pre></blockquote><p> |
|
991 | */ |
|
992 | 0 | protected static final BitSet domainlabel = toplabel; |
993 | ||
994 | ||
995 | /** |
|
996 | * BitSet for hostname. |
|
997 | * <p><blockquote><pre> |
|
998 | * hostname = *( domainlabel "." ) toplabel [ "." ] |
|
999 | * </pre></blockquote><p> |
|
1000 | */ |
|
1001 | 0 | protected static final BitSet hostname = new BitSet(256); |
1002 | // Static initializer for hostname |
|
1003 | static { |
|
1004 | 0 | hostname.or(toplabel); |
1005 | // hostname.or(domainlabel); |
|
1006 | 0 | hostname.set('.'); |
1007 | } |
|
1008 | ||
1009 | ||
1010 | /** |
|
1011 | * BitSet for host. |
|
1012 | * <p><blockquote><pre> |
|
1013 | * host = hostname | IPv4address | IPv6reference |
|
1014 | * </pre></blockquote><p> |
|
1015 | */ |
|
1016 | 0 | protected static final BitSet host = new BitSet(256); |
1017 | // Static initializer for host |
|
1018 | static { |
|
1019 | 0 | host.or(hostname); |
1020 | // host.or(IPv4address); |
|
1021 | 0 | host.or(IPv6reference); // IPv4address |
1022 | } |
|
1023 | ||
1024 | ||
1025 | /** |
|
1026 | * BitSet for hostport. |
|
1027 | * <p><blockquote><pre> |
|
1028 | * hostport = host [ ":" port ] |
|
1029 | * </pre></blockquote><p> |
|
1030 | */ |
|
1031 | 0 | protected static final BitSet hostport = new BitSet(256); |
1032 | // Static initializer for hostport |
|
1033 | static { |
|
1034 | 0 | hostport.or(host); |
1035 | 0 | hostport.set(':'); |
1036 | 0 | hostport.or(port); |
1037 | } |
|
1038 | ||
1039 | ||
1040 | /** |
|
1041 | * Bitset for userinfo. |
|
1042 | * <p><blockquote><pre> |
|
1043 | * userinfo = *( unreserved | escaped | |
|
1044 | * ";" | ":" | "&" | "=" | "+" | "$" | "," ) |
|
1045 | * </pre></blockquote><p> |
|
1046 | */ |
|
1047 | 0 | protected static final BitSet userinfo = new BitSet(256); |
1048 | // Static initializer for userinfo |
|
1049 | static { |
|
1050 | 0 | userinfo.or(unreserved); |
1051 | 0 | userinfo.or(escaped); |
1052 | 0 | userinfo.set(';'); |
1053 | 0 | userinfo.set(':'); |
1054 | 0 | userinfo.set('&'); |
1055 | 0 | userinfo.set('='); |
1056 | 0 | userinfo.set('+'); |
1057 | 0 | userinfo.set('$'); |
1058 | 0 | userinfo.set(','); |
1059 | } |
|
1060 | ||
1061 | ||
1062 | /** |
|
1063 | * BitSet for within the userinfo component like user and password. |
|
1064 | */ |
|
1065 | 0 | public static final BitSet within_userinfo = new BitSet(256); |
1066 | // Static initializer for within_userinfo |
|
1067 | static { |
|
1068 | 0 | within_userinfo.or(userinfo); |
1069 | 0 | within_userinfo.clear(';'); // reserved within authority |
1070 | 0 | within_userinfo.clear(':'); |
1071 | 0 | within_userinfo.clear('@'); |
1072 | 0 | within_userinfo.clear('?'); |
1073 | 0 | within_userinfo.clear('/'); |
1074 | } |
|
1075 | ||
1076 | ||
1077 | /** |
|
1078 | * Bitset for server. |
|
1079 | * <p><blockquote><pre> |
|
1080 | * server = [ [ userinfo "@" ] hostport ] |
|
1081 | * </pre></blockquote><p> |
|
1082 | */ |
|
1083 | 0 | protected static final BitSet server = new BitSet(256); |
1084 | // Static initializer for server |
|
1085 | static { |
|
1086 | 0 | server.or(userinfo); |
1087 | 0 | server.set('@'); |
1088 | 0 | server.or(hostport); |
1089 | } |
|
1090 | ||
1091 | ||
1092 | /** |
|
1093 | * BitSet for reg_name. |
|
1094 | * <p><blockquote><pre> |
|
1095 | * reg_name = 1*( unreserved | escaped | "$" | "," | |
|
1096 | * ";" | ":" | "@" | "&" | "=" | "+" ) |
|
1097 | * </pre></blockquote><p> |
|
1098 | */ |
|
1099 | 0 | protected static final BitSet reg_name = new BitSet(256); |
1100 | // Static initializer for reg_name |
|
1101 | static { |
|
1102 | 0 | reg_name.or(unreserved); |
1103 | 0 | reg_name.or(escaped); |
1104 | 0 | reg_name.set('$'); |
1105 | 0 | reg_name.set(','); |
1106 | 0 | reg_name.set(';'); |
1107 | 0 | reg_name.set(':'); |
1108 | 0 | reg_name.set('@'); |
1109 | 0 | reg_name.set('&'); |
1110 | 0 | reg_name.set('='); |
1111 | 0 | reg_name.set('+'); |
1112 | } |
|
1113 | ||
1114 | ||
1115 | /** |
|
1116 | * BitSet for authority. |
|
1117 | * <p><blockquote><pre> |
|
1118 | * authority = server | reg_name |
|
1119 | * </pre></blockquote><p> |
|
1120 | */ |
|
1121 | 0 | protected static final BitSet authority = new BitSet(256); |
1122 | // Static initializer for authority |
|
1123 | static { |
|
1124 | 0 | authority.or(server); |
1125 | 0 | authority.or(reg_name); |
1126 | } |
|
1127 | ||
1128 | ||
1129 | /** |
|
1130 | * BitSet for scheme. |
|
1131 | * <p><blockquote><pre> |
|
1132 | * scheme = alpha *( alpha | digit | "+" | "-" | "." ) |
|
1133 | * </pre></blockquote><p> |
|
1134 | */ |
|
1135 | 0 | protected static final BitSet scheme = new BitSet(256); |
1136 | // Static initializer for scheme |
|
1137 | static { |
|
1138 | 0 | scheme.or(alpha); |
1139 | 0 | scheme.or(digit); |
1140 | 0 | scheme.set('+'); |
1141 | 0 | scheme.set('-'); |
1142 | 0 | scheme.set('.'); |
1143 | } |
|
1144 | ||
1145 | ||
1146 | /** |
|
1147 | * BitSet for rel_segment. |
|
1148 | * <p><blockquote><pre> |
|
1149 | * rel_segment = 1*( unreserved | escaped | |
|
1150 | * ";" | "@" | "&" | "=" | "+" | "$" | "," ) |
|
1151 | * </pre></blockquote><p> |
|
1152 | */ |
|
1153 | 0 | protected static final BitSet rel_segment = new BitSet(256); |
1154 | // Static initializer for rel_segment |
|
1155 | static { |
|
1156 | 0 | rel_segment.or(unreserved); |
1157 | 0 | rel_segment.or(escaped); |
1158 | 0 | rel_segment.set(';'); |
1159 | 0 | rel_segment.set('@'); |
1160 | 0 | rel_segment.set('&'); |
1161 | 0 | rel_segment.set('='); |
1162 | 0 | rel_segment.set('+'); |
1163 | 0 | rel_segment.set('$'); |
1164 | 0 | rel_segment.set(','); |
1165 | } |
|
1166 | ||
1167 | ||
1168 | /** |
|
1169 | * BitSet for rel_path. |
|
1170 | * <p><blockquote><pre> |
|
1171 | * rel_path = rel_segment [ abs_path ] |
|
1172 | * </pre></blockquote><p> |
|
1173 | */ |
|
1174 | 0 | protected static final BitSet rel_path = new BitSet(256); |
1175 | // Static initializer for rel_path |
|
1176 | static { |
|
1177 | 0 | rel_path.or(rel_segment); |
1178 | 0 | rel_path.or(abs_path); |
1179 | } |
|
1180 | ||
1181 | ||
1182 | /** |
|
1183 | * BitSet for net_path. |
|
1184 | * <p><blockquote><pre> |
|
1185 | * net_path = "//" authority [ abs_path ] |
|
1186 | * </pre></blockquote><p> |
|
1187 | */ |
|
1188 | 0 | protected static final BitSet net_path = new BitSet(256); |
1189 | // Static initializer for net_path |
|
1190 | static { |
|
1191 | 0 | net_path.set('/'); |
1192 | 0 | net_path.or(authority); |
1193 | 0 | net_path.or(abs_path); |
1194 | } |
|
1195 | ||
1196 | ||
1197 | /** |
|
1198 | * BitSet for hier_part. |
|
1199 | * <p><blockquote><pre> |
|
1200 | * hier_part = ( net_path | abs_path ) [ "?" query ] |
|
1201 | * </pre></blockquote><p> |
|
1202 | */ |
|
1203 | 0 | protected static final BitSet hier_part = new BitSet(256); |
1204 | // Static initializer for hier_part |
|
1205 | static { |
|
1206 | 0 | hier_part.or(net_path); |
1207 | 0 | hier_part.or(abs_path); |
1208 | // hier_part.set('?'); aleady included |
|
1209 | 0 | hier_part.or(query); |
1210 | } |
|
1211 | ||
1212 | ||
1213 | /** |
|
1214 | * BitSet for relativeURI. |
|
1215 | * <p><blockquote><pre> |
|
1216 | * relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] |
|
1217 | * </pre></blockquote><p> |
|
1218 | */ |
|
1219 | 0 | protected static final BitSet relativeURI = new BitSet(256); |
1220 | // Static initializer for relativeURI |
|
1221 | static { |
|
1222 | 0 | relativeURI.or(net_path); |
1223 | 0 | relativeURI.or(abs_path); |
1224 | 0 | relativeURI.or(rel_path); |
1225 | // relativeURI.set('?'); aleady included |
|
1226 | 0 | relativeURI.or(query); |
1227 | } |
|
1228 | ||
1229 | ||
1230 | /** |
|
1231 | * BitSet for absoluteURI. |
|
1232 | * <p><blockquote><pre> |
|
1233 | * absoluteURI = scheme ":" ( hier_part | opaque_part ) |
|
1234 | * </pre></blockquote><p> |
|
1235 | */ |
|
1236 | 0 | protected static final BitSet absoluteURI = new BitSet(256); |
1237 | // Static initializer for absoluteURI |
|
1238 | static { |
|
1239 | 0 | absoluteURI.or(scheme); |
1240 | 0 | absoluteURI.set(':'); |
1241 | 0 | absoluteURI.or(hier_part); |
1242 | 0 | absoluteURI.or(opaque_part); |
1243 | } |
|
1244 | ||
1245 | ||
1246 | /** |
|
1247 | * BitSet for URI-reference. |
|
1248 | * <p><blockquote><pre> |
|
1249 | * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
|
1250 | * </pre></blockquote><p> |
|
1251 | */ |
|
1252 | 0 | protected static final BitSet URI_reference = new BitSet(256); |
1253 | // Static initializer for URI_reference |
|
1254 | static { |
|
1255 | 0 | URI_reference.or(absoluteURI); |
1256 | 0 | URI_reference.or(relativeURI); |
1257 | 0 | URI_reference.set('#'); |
1258 | 0 | URI_reference.or(fragment); |
1259 | } |
|
1260 | ||
1261 | // ---------------------------- Characters disallowed within the URI syntax |
|
1262 | // Excluded US-ASCII Characters are like control, space, delims and unwise |
|
1263 | ||
1264 | /** |
|
1265 | * BitSet for control. |
|
1266 | */ |
|
1267 | 0 | public static final BitSet control = new BitSet(256); |
1268 | // Static initializer for control |
|
1269 | static { |
|
1270 | 0 | for (int i = 0; i <= 0x1F; i++) { |
1271 | 0 | control.set(i); |
1272 | } |
|
1273 | 0 | control.set(0x7F); |
1274 | } |
|
1275 | ||
1276 | /** |
|
1277 | * BitSet for space. |
|
1278 | */ |
|
1279 | 0 | public static final BitSet space = new BitSet(256); |
1280 | // Static initializer for space |
|
1281 | static { |
|
1282 | 0 | space.set(0x20); |
1283 | } |
|
1284 | ||
1285 | ||
1286 | /** |
|
1287 | * BitSet for delims. |
|
1288 | */ |
|
1289 | 0 | public static final BitSet delims = new BitSet(256); |
1290 | // Static initializer for delims |
|
1291 | static { |
|
1292 | 0 | delims.set('<'); |
1293 | 0 | delims.set('>'); |
1294 | 0 | delims.set('#'); |
1295 | 0 | delims.set('%'); |
1296 | 0 | delims.set('"'); |
1297 | } |
|
1298 | ||
1299 | ||
1300 | /** |
|
1301 | * BitSet for unwise. |
|
1302 | */ |
|
1303 | 0 | public static final BitSet unwise = new BitSet(256); |
1304 | // Static initializer for unwise |
|
1305 | static { |
|
1306 | 0 | unwise.set('{'); |
1307 | 0 | unwise.set('}'); |
1308 | 0 | unwise.set('|'); |
1309 | 0 | unwise.set('\\'); |
1310 | 0 | unwise.set('^'); |
1311 | 0 | unwise.set('['); |
1312 | 0 | unwise.set(']'); |
1313 | 0 | unwise.set('`'); |
1314 | } |
|
1315 | ||
1316 | ||
1317 | /** |
|
1318 | * Disallowed rel_path before escaping. |
|
1319 | */ |
|
1320 | 0 | public static final BitSet disallowed_rel_path = new BitSet(256); |
1321 | // Static initializer for disallowed_rel_path |
|
1322 | static { |
|
1323 | 0 | disallowed_rel_path.or(uric); |
1324 | 0 | disallowed_rel_path.andNot(rel_path); |
1325 | } |
|
1326 | ||
1327 | ||
1328 | /** |
|
1329 | * Disallowed opaque_part before escaping. |
|
1330 | */ |
|
1331 | 0 | public static final BitSet disallowed_opaque_part = new BitSet(256); |
1332 | // Static initializer for disallowed_opaque_part |
|
1333 | static { |
|
1334 | 0 | disallowed_opaque_part.or(uric); |
1335 | 0 | disallowed_opaque_part.andNot(opaque_part); |
1336 | } |
|
1337 | ||
1338 | // ----------------------- Characters allowed within and for each component |
|
1339 | ||
1340 | /** |
|
1341 | * Those characters that are allowed for the authority component. |
|
1342 | */ |
|
1343 | 0 | public static final BitSet allowed_authority = new BitSet(256); |
1344 | // Static initializer for allowed_authority |
|
1345 | static { |
|
1346 | 0 | allowed_authority.or(authority); |
1347 | 0 | allowed_authority.clear('%'); |
1348 | } |
|
1349 | ||
1350 | ||
1351 | /** |
|
1352 | * Those characters that are allowed for the opaque_part. |
|
1353 | */ |
|
1354 | 0 | public static final BitSet allowed_opaque_part = new BitSet(256); |
1355 | // Static initializer for allowed_opaque_part |
|
1356 | static { |
|
1357 | 0 | allowed_opaque_part.or(opaque_part); |
1358 | 0 | allowed_opaque_part.clear('%'); |
1359 | } |
|
1360 | ||
1361 | ||
1362 | /** |
|
1363 | * Those characters that are allowed for the reg_name. |
|
1364 | */ |
|
1365 | 0 | public static final BitSet allowed_reg_name = new BitSet(256); |
1366 | // Static initializer for allowed_reg_name |
|
1367 | static { |
|
1368 | 0 | allowed_reg_name.or(reg_name); |
1369 | // allowed_reg_name.andNot(percent); |
|
1370 | 0 | allowed_reg_name.clear('%'); |
1371 | } |
|
1372 | ||
1373 | ||
1374 | /** |
|
1375 | * Those characters that are allowed for the userinfo component. |
|
1376 | */ |
|
1377 | 0 | public static final BitSet allowed_userinfo = new BitSet(256); |
1378 | // Static initializer for allowed_userinfo |
|
1379 | static { |
|
1380 | 0 | allowed_userinfo.or(userinfo); |
1381 | // allowed_userinfo.andNot(percent); |
|
1382 | 0 | allowed_userinfo.clear('%'); |
1383 | } |
|
1384 | ||
1385 | ||
1386 | /** |
|
1387 | * Those characters that are allowed for within the userinfo component. |
|
1388 | */ |
|
1389 | 0 | public static final BitSet allowed_within_userinfo = new BitSet(256); |
1390 | // Static initializer for allowed_within_userinfo |
|
1391 | static { |
|
1392 | 0 | allowed_within_userinfo.or(within_userinfo); |
1393 | 0 | allowed_within_userinfo.clear('%'); |
1394 | } |
|
1395 | ||
1396 | ||
1397 | /** |
|
1398 | * Those characters that are allowed for the IPv6reference component. |
|
1399 | * The characters '[', ']' in IPv6reference should be excluded. |
|
1400 | */ |
|
1401 | 0 | public static final BitSet allowed_IPv6reference = new BitSet(256); |
1402 | // Static initializer for allowed_IPv6reference |
|
1403 | static { |
|
1404 | 0 | allowed_IPv6reference.or(IPv6reference); |
1405 | // allowed_IPv6reference.andNot(unwise); |
|
1406 | 0 | allowed_IPv6reference.clear('['); |
1407 | 0 | allowed_IPv6reference.clear(']'); |
1408 | } |
|
1409 | ||
1410 | ||
1411 | /** |
|
1412 | * Those characters that are allowed for the host component. |
|
1413 | * The characters '[', ']' in IPv6reference should be excluded. |
|
1414 | */ |
|
1415 | 0 | public static final BitSet allowed_host = new BitSet(256); |
1416 | // Static initializer for allowed_host |
|
1417 | static { |
|
1418 | 0 | allowed_host.or(hostname); |
1419 | 0 | allowed_host.or(allowed_IPv6reference); |
1420 | } |
|
1421 | ||
1422 | ||
1423 | /** |
|
1424 | * Those characters that are allowed for the authority component. |
|
1425 | */ |
|
1426 | 0 | public static final BitSet allowed_within_authority = new BitSet(256); |
1427 | // Static initializer for allowed_within_authority |
|
1428 | static { |
|
1429 | 0 | allowed_within_authority.or(server); |
1430 | 0 | allowed_within_authority.or(reg_name); |
1431 | 0 | allowed_within_authority.clear(';'); |
1432 | 0 | allowed_within_authority.clear(':'); |
1433 | 0 | allowed_within_authority.clear('@'); |
1434 | 0 | allowed_within_authority.clear('?'); |
1435 | 0 | allowed_within_authority.clear('/'); |
1436 | } |
|
1437 | ||
1438 | ||
1439 | /** |
|
1440 | * Those characters that are allowed for the abs_path. |
|
1441 | */ |
|
1442 | 0 | public static final BitSet allowed_abs_path = new BitSet(256); |
1443 | // Static initializer for allowed_abs_path |
|
1444 | static { |
|
1445 | 0 | allowed_abs_path.or(abs_path); |
1446 | // allowed_abs_path.set('/'); // aleady included |
|
1447 | 0 | allowed_abs_path.andNot(percent); |
1448 | } |
|
1449 | ||
1450 | ||
1451 | /** |
|
1452 | * Those characters that are allowed for the rel_path. |
|
1453 | */ |
|
1454 | 0 | public static final BitSet allowed_rel_path = new BitSet(256); |
1455 | // Static initializer for allowed_rel_path |
|
1456 | static { |
|
1457 | 0 | allowed_rel_path.or(rel_path); |
1458 | 0 | allowed_rel_path.clear('%'); |
1459 | } |
|
1460 | ||
1461 | ||
1462 | /** |
|
1463 | * Those characters that are allowed within the path. |
|
1464 | */ |
|
1465 | 0 | public static final BitSet allowed_within_path = new BitSet(256); |
1466 | // Static initializer for allowed_within_path |
|
1467 | static { |
|
1468 | 0 | allowed_within_path.or(abs_path); |
1469 | 0 | allowed_within_path.clear('/'); |
1470 | 0 | allowed_within_path.clear(';'); |
1471 | 0 | allowed_within_path.clear('='); |
1472 | 0 | allowed_within_path.clear('?'); |
1473 | } |
|
1474 | ||
1475 | ||
1476 | /** |
|
1477 | * Those characters that are allowed for the query component. |
|
1478 | */ |
|
1479 | 0 | public static final BitSet allowed_query = new BitSet(256); |
1480 | // Static initializer for allowed_query |
|
1481 | static { |
|
1482 | 0 | allowed_query.or(uric); |
1483 | 0 | allowed_query.clear('%'); |
1484 | } |
|
1485 | ||
1486 | ||
1487 | /** |
|
1488 | * Those characters that are allowed within the query component. |
|
1489 | */ |
|
1490 | 0 | public static final BitSet allowed_within_query = new BitSet(256); |
1491 | // Static initializer for allowed_within_query |
|
1492 | static { |
|
1493 | 0 | allowed_within_query.or(allowed_query); |
1494 | 0 | allowed_within_query.andNot(reserved); // excluded 'reserved' |
1495 | 0 | allowed_within_query.clear('#'); // avoid confict with the fragment |
1496 | } |
|
1497 | ||
1498 | ||
1499 | /** |
|
1500 | * Those characters that are allowed for the fragment component. |
|
1501 | */ |
|
1502 | 0 | public static final BitSet allowed_fragment = new BitSet(256); |
1503 | // Static initializer for allowed_fragment |
|
1504 | static { |
|
1505 | 0 | allowed_fragment.or(uric); |
1506 | 0 | allowed_fragment.clear('%'); |
1507 | 0 | } |
1508 | ||
1509 | // ------------------------------------------- Flags for this URI-reference |
|
1510 | ||
1511 | // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] |
|
1512 | // absoluteURI = scheme ":" ( hier_part | opaque_part ) |
|
1513 | protected boolean _is_hier_part; |
|
1514 | protected boolean _is_opaque_part; |
|
1515 | // relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ] |
|
1516 | // hier_part = ( net_path | abs_path ) [ "?" query ] |
|
1517 | protected boolean _is_net_path; |
|
1518 | protected boolean _is_abs_path; |
|
1519 | protected boolean _is_rel_path; |
|
1520 | // net_path = "//" authority [ abs_path ] |
|
1521 | // authority = server | reg_name |
|
1522 | protected boolean _is_reg_name; |
|
1523 | protected boolean _is_server; // = _has_server |
|
1524 | // server = [ [ userinfo "@" ] hostport ] |
|
1525 | // host = hostname | IPv4address | IPv6reference |
|
1526 | protected boolean _is_hostname; |
|
1527 | protected boolean _is_IPv4address; |
|
1528 | protected boolean _is_IPv6reference; |
|
1529 | ||
1530 | // ------------------------------------------ Character and escape encoding |
|
1531 | ||
1532 | /** |
|
1533 | * Encode with the default protocol charset. |
|
1534 | * |
|
1535 | * @param original the original character sequence |
|
1536 | * @param allowed those characters that are allowed within a component |
|
1537 | * @return URI character sequence |
|
1538 | * @exception IOException null component or unsupported character encoding |
|
1539 | */ |
|
1540 | protected static char[] encode(String original, BitSet allowed) |
|
1541 | throws IOException { |
|
1542 | ||
1543 | 0 | return encode(original, allowed, _protocolCharset); |
1544 | } |
|
1545 | ||
1546 | ||
1547 | /** |
|
1548 | * Encodes URI string. |
|
1549 | * |
|
1550 | * This is a two mapping, one from original characters to octets, and |
|
1551 | * subsequently a second from octets to URI characters: |
|
1552 | * <p><blockquote><pre> |
|
1553 | * original character sequence->octet sequence->URI character sequence |
|
1554 | * </pre></blockquote><p> |
|
1555 | * |
|
1556 | * An escaped octet is encoded as a character triplet, consisting of the |
|
1557 | * percent character "%" followed by the two hexadecimal digits |
|
1558 | * representing the octet code. For example, "%20" is the escaped |
|
1559 | * encoding for the US-ASCII space character. |
|
1560 | * <p> |
|
1561 | * Conversion from the local filesystem character set to UTF-8 will |
|
1562 | * normally involve a two step process. First convert the local character |
|
1563 | * set to the UCS; then convert the UCS to UTF-8. |
|
1564 | * The first step in the process can be performed by maintaining a mapping |
|
1565 | * table that includes the local character set code and the corresponding |
|
1566 | * UCS code. |
|
1567 | * The next step is to convert the UCS character code to the UTF-8 encoding. |
|
1568 | * <p> |
|
1569 | * Mapping between vendor codepages can be done in a very similar manner |
|
1570 | * as described above. |
|
1571 | * <p> |
|
1572 | * The only time escape encodings can allowedly be made is when a URI is |
|
1573 | * being created from its component parts. The escape and validate methods |
|
1574 | * are internally performed within this method. |
|
1575 | * |
|
1576 | * @param original the original character sequence |
|
1577 | * @param allowed those characters that are allowed within a component |
|
1578 | * @param charset the protocol charset |
|
1579 | * @return URI character sequence |
|
1580 | * @exception IOException null component or unsupported character encoding |
|
1581 | */ |
|
1582 | protected static char[] encode(String original, BitSet allowed, |
|
1583 | String charset) throws IOException { |
|
1584 | ||
1585 | // encode original to uri characters. |
|
1586 | 0 | if (original == null) { |
1587 | 0 | throw new IOException(/*IOException.PARSING,*/ "URI: null"); |
1588 | } |
|
1589 | // escape octet to uri characters. |
|
1590 | 0 | if (allowed == null) { |
1591 | 0 | throw new IOException(/*IOException.PARSING,*/ |
1592 | 0 | "URI: null allowed characters"); |
1593 | } |
|
1594 | byte[] octets; |
|
1595 | try { |
|
1596 | 0 | octets = original.getBytes(charset); |
1597 | 0 | } catch (UnsupportedEncodingException error) { |
1598 | 0 | throw new IOException(/*IOException.UNSUPPORTED_ENCODING,*/ "Unsupported Encoding: " + charset); |
1599 | 0 | } |
1600 | 0 | StringBuffer buf = new StringBuffer(octets.length); |
1601 | 0 | for (int i = 0; i < octets.length; i++) { |
1602 | 0 | char c = (char) octets[i]; |
1603 | 0 | if (allowed.get(c)) { |
1604 | 0 | buf.append(c); |
1605 | 0 | } else { |
1606 | 0 | buf.append('%'); |
1607 | 0 | byte b = octets[i]; // use the original byte value |
1608 | 0 | char hexadecimal = Character.forDigit((b >> 4) & 0xF, 16); |
1609 | 0 | buf.append(Character.toUpperCase(hexadecimal)); // high |
1610 | 0 | hexadecimal = Character.forDigit(b & 0xF, 16); |
1611 | 0 | buf.append(Character.toUpperCase(hexadecimal)); // low |
1612 | } |
|
1613 | } |
|
1614 | ||
1615 | 0 | return buf.toString().toCharArray(); |
1616 | } |
|
1617 | ||
1618 | ||
1619 | /** |
|
1620 | * Decode with the default protocol charset. |
|
1621 | * |
|
1622 | * @param component the URI character sequence |
|
1623 | * @return original character sequence |
|
1624 | * @exception IOException incomplete trailing escape pattern |
|
1625 | * or unsupported character encoding |
|
1626 | */ |
|
1627 | protected static String decode(char[] component) throws IOException { |
|
1628 | 0 | return decode(component, _protocolCharset); |
1629 | } |
|
1630 | ||
1631 | ||
1632 | /** |
|
1633 | * Decodes URI encoded string. |
|
1634 | * |
|
1635 | * This is a two mapping, one from URI characters to octets, and |
|
1636 | * subsequently a second from octets to original characters: |
|
1637 | * <p><blockquote><pre> |
|
1638 | * URI character sequence->octet sequence->original character sequence |
|
1639 | * </pre></blockquote><p> |
|
1640 | * |
|
1641 | * A URI must be separated into its components before the escaped |
|
1642 | * characters within those components can be allowedly decoded. |
|
1643 | * <p> |
|
1644 | * Notice that there is a chance that URI characters that are non UTF-8 |
|
1645 | * may be parsed as valid UTF-8. A recent non-scientific analysis found |
|
1646 | * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a |
|
1647 | * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0% |
|
1648 | * false reading. |
|
1649 | * <p> |
|
1650 | * The percent "%" character always has the reserved purpose of being |
|
1651 | * the escape indicator, it must be escaped as "%25" in order to be used |
|
1652 | * as data within a URI. |
|
1653 | * <p> |
|
1654 | * The unescape method is internally performed within this method. |
|
1655 | * |
|
1656 | * @param component the URI character sequence |
|
1657 | * @param charset the protocol charset |
|
1658 | * @return original character sequence |
|
1659 | * @exception IOException incomplete trailing escape pattern |
|
1660 | * or unsupported character encoding |
|
1661 | */ |
|
1662 | protected static String decode(char[] component, String charset) |
|
1663 | throws IOException { |
|
1664 | ||
1665 | // unescape uri characters to octets |
|
1666 | 0 | if (component == null) return null; |
1667 | ||
1668 | byte[] octets; |
|
1669 | try { |
|
1670 | 0 | octets = new String(component).getBytes(charset); |
1671 | 0 | } catch (UnsupportedEncodingException error) { |
1672 | 0 | throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */ |
1673 | 0 | "URI: not supported " + charset + " encoding"); |
1674 | 0 | } |
1675 | 0 | int length = octets.length; |
1676 | 0 | int oi = 0; // output index |
1677 | 0 | for (int ii = 0; ii < length; oi++) { |
1678 | 0 | byte aByte = (byte) octets[ii++]; |
1679 | 0 | if (aByte == '%' && ii+2 <= length) { |
1680 | 0 | byte high = (byte) Character.digit((char) octets[ii++], 16); |
1681 | 0 | byte low = (byte) Character.digit((char) octets[ii++], 16); |
1682 | 0 | if (high == -1 || low == -1) { |
1683 | 0 | throw new IOException(/* IOException.ESCAPING, */ |
1684 | 0 | "URI: incomplete trailing escape pattern"); |
1685 | ||
1686 | } |
|
1687 | 0 | aByte = (byte) ((high << 4) + low); |
1688 | } |
|
1689 | 0 | octets[oi] = (byte) aByte; |
1690 | } |
|
1691 | ||
1692 | String result; |
|
1693 | try { |
|
1694 | 0 | result = new String(octets, 0, oi, charset); |
1695 | 0 | } catch (UnsupportedEncodingException error) { |
1696 | 0 | throw new IOException(/* IOException.UNSUPPORTED_ENCODING, */ |
1697 | 0 | "URI: not supported " + charset + " encoding"); |
1698 | 0 | } |
1699 | ||
1700 | 0 | return result; |
1701 | } |
|
1702 | ||
1703 | ||
1704 | /** |
|
1705 | * Pre-validate the unescaped URI string within a specific component. |
|
1706 | * |
|
1707 | * @param component the component string within the component |
|
1708 | * @param disallowed those characters disallowed within the component |
|
1709 | * @return if true, it doesn't have the disallowed characters |
|
1710 | * if false, the component is undefined or an incorrect one |
|
1711 | */ |
|
1712 | protected boolean prevalidate(String component, BitSet disallowed) { |
|
1713 | // prevalidate the given component by disallowed characters |
|
1714 | 0 | if (component == null) { |
1715 | 0 | return false; // undefined |
1716 | } |
|
1717 | 0 | char[] target = component.toCharArray(); |
1718 | 0 | for (int i = 0; i < target.length; i++) { |
1719 | 0 | if (disallowed.get(target[i])) { |
1720 | 0 | return false; |
1721 | } |
|
1722 | } |
|
1723 | 0 | return true; |
1724 | } |
|
1725 | ||
1726 | ||
1727 | /** |
|
1728 | * Validate the URI characters within a specific component. |
|
1729 | * The component must be performed after escape encoding. Or it doesn't |
|
1730 | * include escaped characters. |
|
1731 | * |
|
1732 | * @param component the characters sequence within the component |
|
1733 | * @param generous those characters that are allowed within a component |
|
1734 | * @return if true, it's the correct URI character sequence |
|
1735 | */ |
|
1736 | protected boolean validate(char[] component, BitSet generous) { |
|
1737 | // validate each component by generous characters |
|
1738 | 0 | return validate(component, 0, -1, generous); |
1739 | } |
|
1740 | ||
1741 | ||
1742 | /** |
|
1743 | * Validate the URI characters within a specific component. |
|
1744 | * The component must be performed after escape encoding. Or it doesn't |
|
1745 | * include escaped characters. |
|
1746 | * <p> |
|
1747 | * It's not that much strict, generous. The strict validation might be |
|
1748 | * performed before being called this method. |
|
1749 | * |
|
1750 | * @param component the characters sequence within the component |
|
1751 | * @param soffset the starting offset of the given component |
|
1752 | * @param eoffset the ending offset of the given component |
|
1753 | * if -1, it means the length of the component |
|
1754 | * @param generous those characters that are allowed within a component |
|
1755 | * @return if true, it's the correct URI character sequence |
|
1756 | * @throws NullPointerException null component |
|
1757 | */ |
|
1758 | protected boolean validate(char[] component, int soffset, int eoffset, |
|
1759 | BitSet generous) { |
|
1760 | // validate each component by generous characters |
|
1761 | 0 | if (eoffset == -1) { |
1762 | 0 | eoffset = component.length -1; |
1763 | } |
|
1764 | 0 | for (int i = soffset; i <= eoffset; i++) { |
1765 | 0 | if (!generous.get(component[i])) return false; |
1766 | } |
|
1767 | 0 | return true; |
1768 | } |
|
1769 | ||
1770 | ||
1771 | /** |
|
1772 | * In order to avoid any possilbity of conflict with non-ASCII characters, |
|
1773 | * Parse a URI reference as a <code>String</code> with the character |
|
1774 | * encoding of the local system or the document. |
|
1775 | * <p> |
|
1776 | * The following line is the regular expression for breaking-down a URI |
|
1777 | * reference into its components. |
|
1778 | * <p><blockquote><pre> |
|
1779 | * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|
1780 | * 12 3 4 5 6 7 8 9 |
|
1781 | * </pre></blockquote><p> |
|
1782 | * For example, matching the above expression to |
|
1783 | * http://jakarta.apache.org/ietf/uri/#Related |
|
1784 | * results in the following subexpression matches: |
|
1785 | * <p><blockquote><pre> |
|
1786 | * $1 = http: |
|
1787 | * scheme = $2 = http |
|
1788 | * $3 = //jakarta.apache.org |
|
1789 | * authority = $4 = jakarta.apache.org |
|
1790 | * path = $5 = /ietf/uri/ |
|
1791 | * $6 = <undefined> |
|
1792 | * query = $7 = <undefined> |
|
1793 | * $8 = #Related |
|
1794 | * fragment = $9 = Related |
|
1795 | * </pre></blockquote><p> |
|
1796 | * |
|
1797 | * @param original the original character sequence |
|
1798 | * @param escaped <code>true</code> if <code>original</code> is escaped |
|
1799 | * @return the original character sequence |
|
1800 | * @exception IOException |
|
1801 | */ |
|
1802 | protected void parseUriReference(String original, boolean escaped) |
|
1803 | throws IOException { |
|
1804 | ||
1805 | // validate and contruct the URI character sequence |
|
1806 | 0 | if (original == null || original.length() == 0) { |
1807 | 0 | throw new IOException("URI-Reference required"); |
1808 | } |
|
1809 | ||
1810 | /** @ |
|
1811 | * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|
1812 | */ |
|
1813 | 0 | String tmp = original.trim(); |
1814 | ||
1815 | /** |
|
1816 | * The length of the string sequence of characters. |
|
1817 | * It may not be equal to the length of the byte array. |
|
1818 | */ |
|
1819 | 0 | int length = tmp.length(); |
1820 | ||
1821 | /** |
|
1822 | * Remove the delimiters like angle brackets around an URI. |
|
1823 | */ |
|
1824 | 0 | char[] firstDelimiter = { tmp.charAt(0) }; |
1825 | 0 | if (validate(firstDelimiter, delims)) { |
1826 | 0 | if (length >= 2) { |
1827 | 0 | char[] lastDelimiter = { tmp.charAt(length - 1) }; |
1828 | 0 | if (validate(lastDelimiter, delims)) { |
1829 | 0 | tmp = tmp.substring(1, length - 1); |
1830 | 0 | length = length - 2; |
1831 | } |
|
1832 | } |
|
1833 | } |
|
1834 | ||
1835 | /** |
|
1836 | * The starting index |
|
1837 | */ |
|
1838 | 0 | int from = 0; |
1839 | ||
1840 | /** |
|
1841 | * The test flag whether the URI is started from the path component. |
|
1842 | */ |
|
1843 | 0 | boolean isStartedFromPath = false; |
1844 | 0 | int atColon = tmp.indexOf(':'); |
1845 | 0 | int atSlash = tmp.indexOf('/'); |
1846 | 0 | if (atColon < 0 || (atSlash >= 0 && atSlash < atColon)) { |
1847 | 0 | isStartedFromPath = true; |
1848 | } |
|
1849 | ||
1850 | /** |
|
1851 | * <p><blockquote><pre> |
|
1852 | * @@@@@@@@ |
|
1853 | * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|
1854 | * </pre></blockquote><p> |
|
1855 | */ |
|
1856 | 0 | int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from); |
1857 | 0 | if (at == -1) at = 0; |
1858 | ||
1859 | /** |
|
1860 | * Parse the scheme. |
|
1861 | * <p><blockquote><pre> |
|
1862 | * scheme = $2 = http |
|
1863 | * @ |
|
1864 | * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|
1865 | * </pre></blockquote><p> |
|
1866 | */ |
|
1867 | 0 | if (at < length && tmp.charAt(at) == ':') { |
1868 | 0 | char[] target = tmp.substring(0, at).toLowerCase().toCharArray(); |
1869 | 0 | if (validate(target, scheme)) { |
1870 | 0 | _scheme = target; |
1871 | 0 | } else { |
1872 | 0 | throw new IOException("incorrect scheme"); |
1873 | } |
|
1874 | 0 | from = ++at; |
1875 | } |
|
1876 | ||
1877 | /** |
|
1878 | * Parse the authority component. |
|
1879 | * <p><blockquote><pre> |
|
1880 | * authority = $4 = jakarta.apache.org |
|
1881 | * @@ |
|
1882 | * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|
1883 | * </pre></blockquote><p> |
|
1884 | */ |
|
1885 | // Reset flags |
|
1886 | 0 | _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false; |
1887 | 0 | if (0 <= at && at < length && tmp.charAt(at) == '/') { |
1888 | // Set flag |
|
1889 | 0 | _is_hier_part = true; |
1890 | 0 | if (at + 2 < length && tmp.charAt(at + 1) == '/') { |
1891 | // the temporary index to start the search from |
|
1892 | 0 | int next = indexFirstOf(tmp, "/?#", at + 2); |
1893 | 0 | if (next == -1) { |
1894 | 0 | next = (tmp.substring(at + 2).length() == 0) ? at + 2 : |
1895 | 0 | tmp.length(); |
1896 | } |
|
1897 | 0 | parseAuthority(tmp.substring(at + 2, next), escaped); |
1898 | 0 | from = at = next; |
1899 | // Set flag |
|
1900 | 0 | _is_net_path = true; |
1901 | } |
|
1902 | 0 | if (from == at) { |
1903 | // Set flag |
|
1904 | 0 | _is_abs_path = true; |
1905 | } |
|
1906 | } |
|
1907 | ||
1908 | /** |
|
1909 | * Parse the path component. |
|
1910 | * <p><blockquote><pre> |
|
1911 | * path = $5 = /ietf/uri/ |
|
1912 | * @@@@@@ |
|
1913 | * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|
1914 | * </pre></blockquote><p> |
|
1915 | */ |
|
1916 | 0 | if (from < length) { |
1917 | // rel_path = rel_segment [ abs_path ] |
|
1918 | 0 | int next = indexFirstOf(tmp, "?#", from); |
1919 | 0 | if (next == -1) { |
1920 | 0 | next = tmp.length(); |
1921 | } |
|
1922 | 0 | if (!_is_abs_path) { |
1923 | 0 | if (!escaped && prevalidate(tmp.substring(from, next), |
1924 | 0 | disallowed_rel_path) || escaped && |
1925 | 0 | validate(tmp.substring(from, next).toCharArray(), |
1926 | 0 | rel_path)) { |
1927 | // Set flag |
|
1928 | 0 | _is_rel_path = true; |
1929 | 0 | } else if (!escaped && prevalidate(tmp.substring(from, next), |
1930 | 0 | disallowed_opaque_part) || escaped && |
1931 | 0 | validate(tmp.substring(from, next).toCharArray(), |
1932 | 0 | opaque_part)) { |
1933 | // Set flag |
|
1934 | 0 | _is_opaque_part = true; |
1935 | 0 | } else { |
1936 | // the path component may be empty |
|
1937 | 0 | _path = null; |
1938 | } |
|
1939 | } |
|
1940 | 0 | setPath(tmp.substring(from, next)); |
1941 | 0 | at = next; |
1942 | } |
|
1943 | ||
1944 | /** |
|
1945 | * Parse the query component. |
|
1946 | * <p><blockquote><pre> |
|
1947 | * query = $7 = <undefined> |
|
1948 | * @@@@@@@@@ |
|
1949 | * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|
1950 | * </pre></blockquote><p> |
|
1951 | */ |
|
1952 | 0 | if (0 <= at && at+1 < length && tmp.charAt(at) == '?') { |
1953 | 0 | int next = tmp.indexOf('#', at + 1); |
1954 | 0 | if (next == -1) { |
1955 | 0 | next = tmp.length(); |
1956 | } |
|
1957 | 0 | _query = (escaped) ? tmp.substring(at + 1, next).toCharArray() : |
1958 | 0 | encode(tmp.substring(at + 1, next), allowed_query); |
1959 | 0 | at = next; |
1960 | } |
|
1961 | ||
1962 | /** |
|
1963 | * Parse the fragment component. |
|
1964 | * <p><blockquote><pre> |
|
1965 | * fragment = $9 = Related |
|
1966 | * @@@@@@@@ |
|
1967 | * ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|
1968 | * </pre></blockquote><p> |
|
1969 | */ |
|
1970 | 0 | if (0 <= at && at+1 < length && tmp.charAt(at) == '#') { |
1971 | 0 | _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() : |
1972 | 0 | encode(tmp.substring(at + 1), allowed_fragment); |
1973 | } |
|
1974 | ||
1975 | // set this URI. |
|
1976 | 0 | setUriReference(); |
1977 | 0 | } |
1978 | ||
1979 | ||
1980 | /** |
|
1981 | * Get the earlier index that to be searched for the first occurrance in |
|
1982 | * one of any of the given string. |
|
1983 | * |
|
1984 | * @param s the string to be indexed |
|
1985 | * @param delims the delimiters used to index |
|
1986 | * @return the earlier index if there are delimiters |
|
1987 | */ |
|
1988 | protected int indexFirstOf(String s, String delims) { |
|
1989 | 0 | return indexFirstOf(s, delims, -1); |
1990 | } |
|
1991 | ||
1992 | ||
1993 | /** |
|
1994 | * Get the earlier index that to be searched for the first occurrance in |
|
1995 | * one of any of the given string. |
|
1996 | * |
|
1997 | * @param s the string to be indexed |
|
1998 | * @param delims the delimiters used to index |
|
1999 | * @param offset the from index |
|
2000 | * @return the earlier index if there are delimiters |
|
2001 | */ |
|
2002 | protected int indexFirstOf(String s, String delims, int offset) { |
|
2003 | 0 | if (s == null || s.length() == 0) { |
2004 | 0 | return -1; |
2005 | } |
|
2006 | 0 | if (delims == null || delims.length() == 0) { |
2007 | 0 | return -1; |
2008 | } |
|
2009 | // check boundaries |
|
2010 | 0 | if (offset < 0) { |
2011 | 0 | offset = 0; |
2012 | 0 | } else if (offset > s.length()) { |
2013 | 0 | return -1; |
2014 | } |
|
2015 | // s is never null |
|
2016 | 0 | int min = s.length(); |
2017 | 0 | char[] delim = delims.toCharArray(); |
2018 | 0 | for (int i = 0; i < delim.length; i++) { |
2019 | 0 | int at = s.indexOf(delim[i], offset); |
2020 | 0 | if (at >= 0 && at < min) { |
2021 | 0 | min = at; |
2022 | } |
|
2023 | } |
|
2024 | 0 | return (min == s.length()) ? -1 : min; |
2025 | } |
|
2026 | ||
2027 | ||
2028 | /** |
|
2029 | * Get the earlier index that to be searched for the first occurrance in |
|
2030 | * one of any of the given array. |
|
2031 | * |
|
2032 | * @param s the character array to be indexed |
|
2033 | * @param delim the delimiter used to index |
|
2034 | * @return the ealier index if there are a delimiter |
|
2035 | */ |
|
2036 | protected int indexFirstOf(char[] s, char delim) { |
|
2037 | 0 | return indexFirstOf(s, delim, 0); |
2038 | } |
|
2039 | ||
2040 | ||
2041 | /** |
|
2042 | * Get the earlier index that to be searched for the first occurrance in |
|
2043 | * one of any of the given array. |
|
2044 | * |
|
2045 | * @param s the character array to be indexed |
|
2046 | * @param delim the delimiter used to index |
|
2047 | * @return the ealier index if there is a delimiter |
|
2048 | */ |
|
2049 | protected int indexFirstOf(char[] s, char delim, int offset) { |
|
2050 | 0 | if (s == null || s.length == 0) { |
2051 | 0 | return -1; |
2052 | } |
|
2053 | // check boundaries |
|
2054 | 0 | if (offset < 0) { |
2055 | 0 | offset = 0; |
2056 | 0 | } else if (offset > s.length) { |
2057 | 0 | return -1; |
2058 | } |
|
2059 | 0 | for (int i = offset; i < s.length; i++) { |
2060 | 0 | if (s[i] == delim) { |
2061 | 0 | return i; |
2062 | } |
|
2063 | } |
|
2064 | 0 | return -1; |
2065 | } |
|
2066 | ||
2067 | ||
2068 | /** |
|
2069 | * Parse the authority component. |
|
2070 | * |
|
2071 | * @param original the original character sequence of authority component |
|
2072 | * @param escaped <code>true</code> if <code>original</code> is escaped |
|
2073 | * @exception IOException |
|
2074 | */ |
|
2075 | protected void parseAuthority(String original, boolean escaped) |
|
2076 | throws IOException { |
|
2077 | ||
2078 | // Reset flags |
|
2079 | 0 | _is_reg_name = _is_server = |
2080 | 0 | _is_hostname = _is_IPv4address = _is_IPv6reference = false; |
2081 | ||
2082 | 0 | boolean has_port = true; |
2083 | 0 | int from = 0; |
2084 | 0 | int next = original.indexOf('@'); |
2085 | 0 | if (next != -1) { // neither -1 and 0 |
2086 | // each protocol extented from URI supports the specific userinfo |
|
2087 | 0 | _userinfo = (escaped) ? original.substring(0, next).toCharArray() : |
2088 | 0 | encode(original.substring(0, next), allowed_userinfo); |
2089 | 0 | from = next + 1; |
2090 | } |
|
2091 | 0 | next = original.indexOf('[', from); |
2092 | 0 | if (next >= from) { |
2093 | 0 | next = original.indexOf(']', from); |
2094 | 0 | if (next == -1) { |
2095 | 0 | throw new IOException(/* IOException.PARSING,*/ "URI: IPv6reference"); |
2096 | } else { |
|
2097 | 0 | next++; |
2098 | } |
|
2099 | // In IPv6reference, '[', ']' should be excluded |
|
2100 | 0 | _host = (escaped) ? original.substring(from, next).toCharArray() : |
2101 | 0 | encode(original.substring(from, next), allowed_IPv6reference); |
2102 | // Set flag |
|
2103 | 0 | _is_IPv6reference = true; |
2104 | 0 | } else { // only for !_is_IPv6reference |
2105 | 0 | next = original.indexOf(':', from); |
2106 | 0 | if (next == -1) { |
2107 | 0 | next = original.length(); |
2108 | 0 | has_port = false; |
2109 | } |
|
2110 | // REMINDME: it doesn't need the pre-validation |
|
2111 | 0 | _host = original.substring(from, next).toCharArray(); |
2112 | 0 | if (validate(_host, IPv4address)) { |
2113 | // Set flag |
|
2114 | 0 | _is_IPv4address = true; |
2115 | 0 | } else if (validate(_host, hostname)) { |
2116 | // Set flag |
|
2117 | 0 | _is_hostname = true; |
2118 | 0 | } else { |
2119 | // Set flag |
|
2120 | 0 | _is_reg_name = true; |
2121 | } |
|
2122 | } |
|
2123 | 0 | if (_is_reg_name) { |
2124 | // Reset flags for a server-based naming authority |
|
2125 | 0 | _is_server = _is_hostname = _is_IPv4address = |
2126 | 0 | _is_IPv6reference = false; |
2127 | // set a registry-based naming authority |
|
2128 | 0 | _authority = (escaped) ? original.toString().toCharArray() : |
2129 | 0 | encode(original.toString(), allowed_reg_name); |
2130 | 0 | } else { |
2131 | 0 | if (original.length()-1 > next && has_port && |
2132 | 0 | original.charAt(next) == ':') { // not empty |
2133 | 0 | from = next + 1; |
2134 | try { |
|
2135 | 0 | _port = Integer.parseInt(original.substring(from)); |
2136 | 0 | } catch (NumberFormatException error) { |
2137 | 0 | throw new IOException(/*IOException.PARSING, */ |
2138 | 0 | "URI: invalid port number"); |
2139 | 0 | } |
2140 | } |
|
2141 | // set a server-based naming authority |
|
2142 | 0 | StringBuffer buf = new StringBuffer(); |
2143 | 0 | if (_userinfo != null) { // has_userinfo |
2144 | 0 | buf.append(_userinfo); |
2145 | 0 | buf.append('@'); |
2146 | } |
|
2147 | 0 | if (_host != null) { |
2148 | 0 | buf.append(_host); |
2149 | 0 | if (_port != -1) { |
2150 | 0 | buf.append(':'); |
2151 | 0 | buf.append(_port); |
2152 | } |
|
2153 | } |
|
2154 | 0 | _authority = buf.toString().toCharArray(); |
2155 | // Set flag |
|
2156 | 0 | _is_server = true; |
2157 | } |
|
2158 | 0 | } |
2159 | ||
2160 | ||
2161 | /** |
|
2162 | * Once it's parsed successfully, set this URI. |
|
2163 | * |
|
2164 | * @see #getRawURI |
|
2165 | */ |
|
2166 | protected void setUriReference() { |
|
2167 | // set _uri |
|
2168 | 0 | StringBuffer buf = new StringBuffer(); |
2169 | // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))? |
|
2170 | 0 | if (_scheme != null) { |
2171 | 0 | buf.append(_scheme); |
2172 | 0 | buf.append(':'); |
2173 | } |
|
2174 | 0 | if (_is_net_path) { |
2175 | 0 | buf.append("//"); |
2176 | 0 | if (_authority != null) { // has_authority |
2177 | 0 | if (_userinfo != null) { // by default, remove userinfo part |
2178 | 0 | if (_host != null) { |
2179 | 0 | buf.append(_host); |
2180 | 0 | if (_port != -1) { |
2181 | 0 | buf.append(':'); |
2182 | 0 | buf.append(_port); |
2183 | 0 | } |
2184 | } |
|
2185 | } else { |
|
2186 | 0 | buf.append(_authority); |
2187 | } |
|
2188 | } |
|
2189 | } |
|
2190 | 0 | if (_opaque != null && _is_opaque_part) { |
2191 | 0 | buf.append(_opaque); |
2192 | 0 | } else if (_path != null) { |
2193 | // _is_hier_part or _is_relativeURI |
|
2194 | 0 | if (_path.length != 0) { |
2195 | 0 | buf.append(_path); |
2196 | } |
|
2197 | } |
|
2198 | 0 | if (_query != null) { // has_query |
2199 | 0 | buf.append('?'); |
2200 | 0 | buf.append(_query); |
2201 | } |
|
2202 | 0 | if (_fragment != null) { // has_fragment |
2203 | 0 | buf.append('#'); |
2204 | 0 | buf.append(_fragment); |
2205 | } |
|
2206 | ||
2207 | 0 | _uri = buf.toString().toCharArray(); |
2208 | 0 | } |
2209 | ||
2210 | // ----------------------------------------------------------- Test methods |
|
2211 | ||
2212 | ||
2213 | /** |
|
2214 | * Tell whether or not this URI is absolute. |
|
2215 | * |
|
2216 | * @return true iif this URI is absoluteURI |
|
2217 | */ |
|
2218 | public boolean isAbsoluteURI() { |
|
2219 | 0 | return (_scheme != null); |
2220 | } |
|
2221 | ||
2222 | ||
2223 | /** |
|
2224 | * Tell whether or not this URI is relative. |
|
2225 | * |
|
2226 | * @return true iif this URI is relativeURI |
|
2227 | */ |
|
2228 | public boolean isRelativeURI() { |
|
2229 | 0 | return (_scheme == null); |
2230 | } |
|
2231 | ||
2232 | ||
2233 | /** |
|
2234 | * Tell whether or not the absoluteURI of this URI is hier_part. |
|
2235 | * |
|
2236 | * @return true iif the absoluteURI is hier_part |
|
2237 | */ |
|
2238 | public boolean isHierPart() { |
|
2239 | 0 | return _is_hier_part; |
2240 | } |
|
2241 | ||
2242 | ||
2243 | /** |
|
2244 | * Tell whether or not the absoluteURI of this URI is opaque_part. |
|
2245 | * |
|
2246 | * @return true iif the absoluteURI is opaque_part |
|
2247 | */ |
|
2248 | public boolean isOpaquePart() { |
|
2249 | 0 | return _is_opaque_part; |
2250 | } |
|
2251 | ||
2252 | ||
2253 | /** |
|
2254 | * Tell whether or not the relativeURI or heir_part of this URI is net_path. |
|
2255 | * It's the same function as the has_authority() method. |
|
2256 | * |
|
2257 | * @return true iif the relativeURI or heir_part is net_path |
|
2258 | * @see #hasAuthority |
|
2259 | */ |
|
2260 | public boolean isNetPath() { |
|
2261 | 0 | return _is_net_path || (_authority != null); |
2262 | } |
|
2263 | ||
2264 | ||
2265 | /** |
|
2266 | * Tell whether or not the relativeURI or hier_part of this URI is abs_path. |
|
2267 | * |
|
2268 | * @return true iif the relativeURI or hier_part is abs_path |
|
2269 | */ |
|
2270 | public boolean isAbsPath() { |
|
2271 | 0 | return _is_abs_path; |
2272 | } |
|
2273 | ||
2274 | ||
2275 | /** |
|
2276 | * Tell whether or not the relativeURI of this URI is rel_path. |
|
2277 | * |
|
2278 | * @return true iif the relativeURI is rel_path |
|
2279 | */ |
|
2280 | public boolean isRelPath() { |
|
2281 | 0 | return _is_rel_path; |
2282 | } |
|
2283 | ||
2284 | ||
2285 | /** |
|
2286 | * Tell whether or not this URI has authority. |
|
2287 | * It's the same function as the is_net_path() method. |
|
2288 | * |
|
2289 | * @return true iif this URI has authority |
|
2290 | * @see #isNetPath |
|
2291 | */ |
|
2292 | public boolean hasAuthority() { |
|
2293 | 0 | return (_authority != null) || _is_net_path; |
2294 | } |
|
2295 | ||
2296 | /** |
|
2297 | * Tell whether or not the authority component of this URI is reg_name. |
|
2298 | * |
|
2299 | * @return true iif the authority component is reg_name |
|
2300 | */ |
|
2301 | public boolean isRegName() { |
|
2302 | 0 | return _is_reg_name; |
2303 | } |
|
2304 | ||
2305 | ||
2306 | /** |
|
2307 | * Tell whether or not the authority component of this URI is server. |
|
2308 | * |
|
2309 | * @return true iif the authority component is server |
|
2310 | */ |
|
2311 | public boolean isServer() { |
|
2312 | 0 | return _is_server; |
2313 | } |
|
2314 | ||
2315 | ||
2316 | /** |
|
2317 | * Tell whether or not this URI has userinfo. |
|
2318 | * |
|
2319 | * @return true iif this URI has userinfo |
|
2320 | */ |
|
2321 | public boolean hasUserinfo() { |
|
2322 | 0 | return (_userinfo != null); |
2323 | } |
|
2324 | ||
2325 | ||
2326 | /** |
|
2327 | * Tell whether or not the host part of this URI is hostname. |
|
2328 | * |
|
2329 | * @return true iif the host part is hostname |
|
2330 | */ |
|
2331 | public boolean isHostname() { |
|
2332 | 0 | return _is_hostname; |
2333 | } |
|
2334 | ||
2335 | ||
2336 | /** |
|
2337 | * Tell whether or not the host part of this URI is IPv4address. |
|
2338 | * |
|
2339 | * @return true iif the host part is IPv4address |
|
2340 | */ |
|
2341 | public boolean isIPv4address() { |
|
2342 | 0 | return _is_IPv4address; |
2343 | } |
|
2344 | ||
2345 | ||
2346 | /** |
|
2347 | * Tell whether or not the host part of this URI is IPv6reference. |
|
2348 | * |
|
2349 | * @return true iif the host part is IPv6reference |
|
2350 | */ |
|
2351 | public boolean isIPv6reference() { |
|
2352 | 0 | return _is_IPv6reference; |
2353 | } |
|
2354 | ||
2355 | ||
2356 | /** |
|
2357 | * Tell whether or not this URI has query. |
|
2358 | * |
|
2359 | * @return true iif this URI has query |
|
2360 | */ |
|
2361 | public boolean hasQuery() { |
|
2362 | 0 | return (_query != null); |
2363 | } |
|
2364 | ||
2365 | ||
2366 | /** |
|
2367 | * Tell whether or not this URI has fragment. |
|
2368 | * |
|
2369 | * @return true iif this URI has fragment |
|
2370 | */ |
|
2371 | public boolean hasFragment() { |
|
2372 | 0 | return (_fragment != null); |
2373 | } |
|
2374 | ||
2375 | ||
2376 | // ---------------------------------------------------------------- Charset |
|
2377 | ||
2378 | ||
2379 | /** |
|
2380 | * Set the default charset of the protocol. |
|
2381 | * <p> |
|
2382 | * The character set used to store files SHALL remain a local decision and |
|
2383 | * MAY depend on the capability of local operating systems. Prior to the |
|
2384 | * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format |
|
2385 | * and UTF-8 encoded. This approach, while allowing international exchange |
|
2386 | * of URIs, will still allow backward compatibility with older systems |
|
2387 | * because the code set positions for ASCII characters are identical to the |
|
2388 | * one byte sequence in UTF-8. |
|
2389 | * <p> |
|
2390 | * An individual URI scheme may require a single charset, define a default |
|
2391 | * charset, or provide a way to indicate the charset used. |
|
2392 | * |
|
2393 | * @param charset the default charset for each protocol |
|
2394 | */ |
|
2395 | public static void setProtocolCharset(String charset) { |
|
2396 | 0 | _protocolCharset = charset; |
2397 | 0 | } |
2398 | ||
2399 | ||
2400 | /** |
|
2401 | * Get the default charset of the protocol. |
|
2402 | * <p> |
|
2403 | * An individual URI scheme may require a single charset, define a default |
|
2404 | * charset, or provide a way to indicate the charset used. |
|
2405 | * <p> |
|
2406 | * To work globally either requires support of a number of character sets |
|
2407 | * and to be able to convert between them, or the use of a single preferred |
|
2408 | * character set. |
|
2409 | * For support of global compatibility it is STRONGLY RECOMMENDED that |
|
2410 | * clients and servers use UTF-8 encoding when exchanging URIs. |
|
2411 | * |
|
2412 | * @return the charset string |
|
2413 | */ |
|
2414 | public static String getProtocolCharset() { |
|
2415 | 0 | return _protocolCharset; |
2416 | } |
|
2417 | ||
2418 | ||
2419 | /** |
|
2420 | * Set the default charset of the document. |
|
2421 | * <p> |
|
2422 | * Notice that it will be possible to contain mixed characters (e.g. |
|
2423 | * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional |
|
2424 | * display of these character sets, the protocol charset could be simply |
|
2425 | * used again. Because it's not yet implemented that the insertion of BIDI |
|
2426 | * control characters at different points during composition is extracted. |
|
2427 | * |
|
2428 | * @param charset the default charset for the document |
|
2429 | */ |
|
2430 | public static void setDocumentCharset(String charset) { |
|
2431 | 0 | _documentCharset = charset; |
2432 | 0 | } |
2433 | ||
2434 | ||
2435 | /** |
|
2436 | * Get the default charset of the document. |
|
2437 | * |
|
2438 | * @return the charset string |
|
2439 | */ |
|
2440 | public static String getDocumentCharset() { |
|
2441 | 0 | return _documentCharset; |
2442 | } |
|
2443 | ||
2444 | // ------------------------------------------------------------- The scheme |
|
2445 | ||
2446 | /** |
|
2447 | * Get the scheme. |
|
2448 | * |
|
2449 | * @return the scheme |
|
2450 | */ |
|
2451 | public char[] getRawScheme() { |
|
2452 | 0 | return _scheme; |
2453 | } |
|
2454 | ||
2455 | ||
2456 | /** |
|
2457 | * Get the scheme. |
|
2458 | * |
|
2459 | * @return the scheme |
|
2460 | * null if undefined scheme |
|
2461 | */ |
|
2462 | public String getScheme() { |
|
2463 | 0 | return (_scheme == null) ? null : new String(_scheme); |
2464 | } |
|
2465 | ||
2466 | // ---------------------------------------------------------- The authority |
|
2467 | ||
2468 | /** |
|
2469 | * Set the authority. It can be one type of server, hostport, hostname, |
|
2470 | * IPv4address, IPv6reference and reg_name. |
|
2471 | * <p><blockquote><pre> |
|
2472 | * authority = server | reg_name |
|
2473 | * </pre></blockquote><p> |
|
2474 | * |
|
2475 | * @param escapedAuthority the raw escaped authority |
|
2476 | * @exception IOException |
|
2477 | * @throws NullPointerException null authority |
|
2478 | */ |
|
2479 | public void setRawAuthority(char[] escapedAuthority) throws IOException { |
|
2480 | 0 | parseAuthority(new String(escapedAuthority), true); |
2481 | 0 | setUriReference(); |
2482 | 0 | } |
2483 | ||
2484 | ||
2485 | /** |
|
2486 | * Set the authority. It can be one type of server, hostport, hostname, |
|
2487 | * IPv4address, IPv6reference and reg_name. |
|
2488 | * Note that there is no setAuthority method by the escape encoding reason. |
|
2489 | * |
|
2490 | * @param escapedAuthority the escaped authority string |
|
2491 | * @exception IOException |
|
2492 | */ |
|
2493 | public void setEscapedAuthority(String escapedAuthority) |
|
2494 | throws IOException { |
|
2495 | ||
2496 | 0 | parseAuthority(escapedAuthority, true); |
2497 | 0 | setUriReference(); |
2498 | 0 | } |
2499 | ||
2500 | ||
2501 | /** |
|
2502 | * Get the raw-escaped authority. |
|
2503 | * |
|
2504 | * @return the raw-escaped authority |
|
2505 | */ |
|
2506 | public char[] getRawAuthority() { |
|
2507 | 0 | return _authority; |
2508 | } |
|
2509 | ||
2510 | ||
2511 | /** |
|
2512 | * Get the escaped authority. |
|
2513 | * |
|
2514 | * @return the escaped authority |
|
2515 | */ |
|
2516 | public String getEscapedAuthority() { |
|
2517 | 0 | return (_authority == null) ? null : new String(_authority); |
2518 | } |
|
2519 | ||
2520 | ||
2521 | /** |
|
2522 | * Get the authority. |
|
2523 | * |
|
2524 | * @return the authority |
|
2525 | * @exception IOException |
|
2526 | * @see #decode |
|
2527 | */ |
|
2528 | public String getAuthority() throws IOException { |
|
2529 | 0 | return (_authority == null) ? null : decode(_authority); |
2530 | } |
|
2531 | ||
2532 | // ----------------------------------------------------------- The userinfo |
|
2533 | ||
2534 | /** |
|
2535 | * Get the raw-escaped userinfo. |
|
2536 | * |
|
2537 | * @return the raw-escaped userinfo |
|
2538 | * @see #getAuthority |
|
2539 | */ |
|
2540 | public char[] getRawUserinfo() { |
|
2541 | 0 | return _userinfo; |
2542 | } |
|
2543 | ||
2544 | ||
2545 | /** |
|
2546 | * Get the escaped userinfo. |
|
2547 | * |
|
2548 | * @return the escaped userinfo |
|
2549 | * @see #getAuthority |
|
2550 | */ |
|
2551 | public String getEscapedUserinfo() { |
|
2552 | 0 | return (_userinfo == null) ? null : new String(_userinfo); |
2553 | } |
|
2554 | ||
2555 | ||
2556 | /** |
|
2557 | * Get the userinfo. |
|
2558 | * |
|
2559 | * @return the userinfo |
|
2560 | * @exception IOException |
|
2561 | * @see #decode |
|
2562 | * @see #getAuthority |
|
2563 | */ |
|
2564 | public String getUserinfo() throws IOException { |
|
2565 | 0 | return (_userinfo == null) ? null : decode(_userinfo); |
2566 | } |
|
2567 | ||
2568 | // --------------------------------------------------------------- The host |
|
2569 | ||
2570 | /** |
|
2571 | * Get the host. |
|
2572 | * <p><blockquote><pre> |
|
2573 | * host = hostname | IPv4address | IPv6reference |
|
2574 | * </pre></blockquote><p> |
|
2575 | * |
|
2576 | * @return the host |
|
2577 | * @see #getAuthority |
|
2578 | */ |
|
2579 | public char[] getRawHost() { |
|
2580 | 0 | return _host; |
2581 | } |
|
2582 | ||
2583 | ||
2584 | /** |
|
2585 | * Get the host. |
|
2586 | * <p><blockquote><pre> |
|
2587 | * host = hostname | IPv4address | IPv6reference |
|
2588 | * </pre></blockquote><p> |
|
2589 | * |
|
2590 | * @return the host |
|
2591 | * @exception IOException |
|
2592 | * @see #decode |
|
2593 | * @see #getAuthority |
|
2594 | */ |
|
2595 | public String getHost() throws IOException { |
|
2596 | 0 | return decode(_host); |
2597 | } |
|
2598 | ||
2599 | // --------------------------------------------------------------- The port |
|
2600 | ||
2601 | /** |
|
2602 | * Get the port. In order to get the specfic default port, the specific |
|
2603 | * protocol-supported class extended from the URI class should be used. |
|
2604 | * It has the server-based naming authority. |
|
2605 | * |
|
2606 | * @return the port |
|
2607 | * if -1, it has the default port for the scheme or the server-based |
|
2608 | * naming authority is not supported in the specific URI. |
|
2609 | */ |
|
2610 | public int getPort() { |
|
2611 | 0 | return _port; |
2612 | } |
|
2613 | ||
2614 | // --------------------------------------------------------------- The path |
|
2615 | ||
2616 | /** |
|
2617 | * Set the path. The method couldn't be used by API programmers. |
|
2618 | * |
|
2619 | * @param path the path string |
|
2620 | * @exception IOException set incorrectly or fragment only |
|
2621 | * @see #encode |
|
2622 | */ |
|
2623 | protected void setPath(String path) throws IOException { |
|
2624 | ||
2625 | // set path |
|
2626 | 0 | if (_is_net_path || _is_abs_path) { |
2627 | 0 | _path = encode(path, allowed_abs_path); |
2628 | 0 | } else if (_is_rel_path) { |
2629 | 0 | StringBuffer buff = new StringBuffer(path.length()); |
2630 | 0 | int at = path.indexOf('/'); |
2631 | 0 | if (at > 0) { // never 0 |
2632 | 0 | buff.append(encode(path.substring(0, at), allowed_rel_path)); |
2633 | 0 | buff.append(encode(path.substring(at), allowed_abs_path)); |
2634 | 0 | } else { |
2635 | 0 | buff.append(encode(path, allowed_rel_path)); |
2636 | } |
|
2637 | 0 | _path = buff.toString().toCharArray(); |
2638 | 0 | } else if (_is_opaque_part) { |
2639 | 0 | _opaque = encode(path, allowed_opaque_part); |
2640 | 0 | } else { |
2641 | 0 | throw new IOException(/*IOException.PARSING, */"URI: incorrect path"); |
2642 | } |
|
2643 | 0 | } |
2644 | ||
2645 | ||
2646 | /** |
|
2647 | * Resolve the base and relative path. |
|
2648 | * |
|
2649 | * @param base_path a character array of the base_path |
|
2650 | * @param rel_path a character array of the rel_path |
|
2651 | * @return the resolved path |
|
2652 | */ |
|
2653 | protected char[] resolvePath(char[] base_path, char[] rel_path) { |
|
2654 | ||
2655 | // REMINDME: paths are never null |
|
2656 | 0 | String base = (base_path == null) ? "" : new String(base_path); |
2657 | 0 | int at = base.lastIndexOf('/'); |
2658 | 0 | if (at != -1) { |
2659 | 0 | base_path = base.substring(0, at + 1).toCharArray(); |
2660 | } |
|
2661 | // _path could be empty |
|
2662 | 0 | if (rel_path == null || rel_path.length == 0) { |
2663 | 0 | return normalize(base_path); |
2664 | 0 | } else if (rel_path[0] == '/') { |
2665 | 0 | return rel_path; |
2666 | } else { |
|
2667 | 0 | StringBuffer buff = new StringBuffer(base.length() + |
2668 | 0 | rel_path.length); |
2669 | 0 | if (at != -1) { |
2670 | 0 | buff.append(base.substring(0, at + 1)); |
2671 | 0 | buff.append(rel_path); |
2672 | } |
|
2673 | 0 | return normalize(buff.toString().toCharArray()); |
2674 | } |
|
2675 | } |
|
2676 | ||
2677 | ||
2678 | /** |
|
2679 | * Get the raw-escaped current hierarchy level in the given path. |
|
2680 | * If the last namespace is a collection, the slash mark ('/') should be |
|
2681 | * ended with at the last character of the path string. |
|
2682 | * |
|
2683 | * @param path the path |
|
2684 | * @return the current hierarchy level |
|
2685 | * @exception IOException no hierarchy level |
|
2686 | */ |
|
2687 | protected char[] getRawCurrentHierPath(char[] path) throws IOException { |
|
2688 | ||
2689 | 0 | if (_is_opaque_part) { |
2690 | 0 | throw new IOException(/*IOException.PARSING,*/ "URI: no hierarchy level"); |
2691 | } |
|
2692 | 0 | if (path == null) { |
2693 | 0 | throw new IOException(/*IOException.PARSING,*/ "URI: emtpy path"); |
2694 | } |
|
2695 | 0 | String buff = new String(path); |
2696 | 0 | int first = buff.indexOf('/'); |
2697 | 0 | int last = buff.lastIndexOf('/'); |
2698 | 0 | if (last == 0) { |
2699 | 0 | return rootPath; |
2700 | 0 | } else if (first != last && last != -1) { |
2701 | 0 | return buff.substring(0, last).toCharArray(); |
2702 | } |
|
2703 | // FIXME: it could be a document on the server side |
|
2704 | 0 | return path; |
2705 | } |
|
2706 | ||
2707 | ||
2708 | /** |
|
2709 | * Get the raw-escaped current hierarchy level. |
|
2710 | * |
|
2711 | * @return the raw-escaped current hierarchy level |
|
2712 | * @exception IOException no hierarchy level |
|
2713 | */ |
|
2714 | public char[] getRawCurrentHierPath() throws IOException { |
|
2715 | 0 | return (_path == null) ? null : getRawCurrentHierPath(_path); |
2716 | } |
|
2717 | ||
2718 | ||
2719 | /** |
|
2720 | * Get the escaped current hierarchy level. |
|
2721 | * |
|
2722 | * @return the escaped current hierarchy level |
|
2723 | * @exception IOException no hierarchy level |
|
2724 | */ |
|
2725 | public String getEscapedCurrentHierPath() throws IOException { |
|
2726 | 0 | char[] path = getRawCurrentHierPath(); |
2727 | 0 | return (path == null) ? null : new String(path); |
2728 | } |
|
2729 | ||
2730 | ||
2731 | /** |
|
2732 | * Get the current hierarchy level. |
|
2733 | * |
|
2734 | * @return the current hierarchy level |
|
2735 | * @exception IOException |
|
2736 | * @see #decode |
|
2737 | */ |
|
2738 | public String getCurrentHierPath() throws IOException { |
|
2739 | 0 | char[] path = getRawCurrentHierPath(); |
2740 | 0 | return (path == null) ? null : decode(path); |
2741 | } |
|
2742 | ||
2743 | ||
2744 | /** |
|
2745 | * Get the level above the this hierarchy level. |
|
2746 | * |
|
2747 | * @return the raw above hierarchy level |
|
2748 | * @exception IOException |
|
2749 | */ |
|
2750 | public char[] getRawAboveHierPath() throws IOException { |
|
2751 | 0 | char[] path = getRawCurrentHierPath(); |
2752 | 0 | return (path == null) ? null : getRawCurrentHierPath(path); |
2753 | } |
|
2754 | ||
2755 | ||
2756 | /** |
|
2757 | * Get the level above the this hierarchy level. |
|
2758 | * |
|
2759 | * @return the raw above hierarchy level |
|
2760 | * @exception IOException |
|
2761 | */ |
|
2762 | public String getEscapedAboveHierPath() throws IOException { |
|
2763 | 0 | char[] path = getRawAboveHierPath(); |
2764 | 0 | return (path == null) ? null : new String(path); |
2765 | } |
|
2766 | ||
2767 | ||
2768 | /** |
|
2769 | * Get the level above the this hierarchy level. |
|
2770 | * |
|
2771 | * @return the above hierarchy level |
|
2772 | * @exception IOException |
|
2773 | * @see #decode |
|
2774 | */ |
|
2775 | public String getAboveHierPath() throws IOException { |
|
2776 | 0 | char[] path = getRawAboveHierPath(); |
2777 | 0 | return (path == null) ? null : decode(path); |
2778 | } |
|
2779 | ||
2780 | ||
2781 | /** |
|
2782 | * Get the raw-escaped path. |
|
2783 | * <p><blockquote><pre> |
|
2784 | * path = [ abs_path | opaque_part ] |
|
2785 | * </pre></blockquote><p> |
|
2786 | * |
|
2787 | * @return the raw-escaped path |
|
2788 | */ |
|
2789 | public char[] getRawPath() { |
|
2790 | 0 | return _is_opaque_part ? _opaque : _path; |
2791 | } |
|
2792 | ||
2793 | ||
2794 | /** |
|
2795 | * Get the escaped path. |
|
2796 | * <p><blockquote><pre> |
|
2797 | * path = [ abs_path | opaque_part ] |
|
2798 | * abs_path = "/" path_segments |
|
2799 | * opaque_part = uric_no_slash *uric |
|
2800 | * </pre></blockquote><p> |
|
2801 | * |
|
2802 | * @return the escaped path string |
|
2803 | */ |
|
2804 | public String getEscapedPath() { |
|
2805 | 0 | char[] path = getRawPath(); |
2806 | 0 | return (path == null) ? null : new String(path); |
2807 | } |
|
2808 | ||
2809 | ||
2810 | /** |
|
2811 | * Get the path. |
|
2812 | * <p><blockquote><pre> |
|
2813 | * path = [ abs_path | opaque_part ] |
|
2814 | * </pre></blockquote><p> |
|
2815 | * @return the path string |
|
2816 | * @exception IOException |
|
2817 | * @see #decode |
|
2818 | */ |
|
2819 | public String getPath() throws IOException { |
|
2820 | 0 | char[] path = getRawPath(); |
2821 | 0 | return (path == null) ? null : decode(path); |
2822 | } |
|
2823 | ||
2824 | ||
2825 | /** |
|
2826 | * Get the raw-escaped basename of the path. |
|
2827 | * |
|
2828 | * @return the raw-escaped basename |
|
2829 | */ |
|
2830 | public char[] getRawName() { |
|
2831 | 0 | if (_path == null) return null; |
2832 | ||
2833 | 0 | int at = 0; |
2834 | 0 | for (int i = _path.length - 1; i >= 0; i--) { |
2835 | 0 | if (_path[i] == '/') { |
2836 | 0 | at = i + 1; |
2837 | 0 | break; |
2838 | } |
|
2839 | } |
|
2840 | 0 | int len = _path.length - at; |
2841 | 0 | char[] basename = new char[len]; |
2842 | 0 | System.arraycopy(_path, at, basename, 0, len); |
2843 | 0 | return basename; |
2844 | } |
|
2845 | ||
2846 | ||
2847 | /** |
|
2848 | * Get the escaped basename of the path. |
|
2849 | * |
|
2850 | * @return the escaped basename string |
|
2851 | */ |
|
2852 | public String getEscapedName() { |
|
2853 | 0 | char[] basename = getRawName(); |
2854 | 0 | return (basename == null) ? null : new String(basename); |
2855 | } |
|
2856 | ||
2857 | ||
2858 | /** |
|
2859 | * Get the basename of the path. |
|
2860 | * |
|
2861 | * @return the basename string |
|
2862 | * @exception IOException incomplete trailing escape pattern |
|
2863 | * Or unsupported character encoding |
|
2864 | * @see #decode |
|
2865 | */ |
|
2866 | public String getName() throws IOException { |
|
2867 | 0 | char[] basename = getRawName(); |
2868 | 0 | return (basename == null) ? null : decode(getRawName()); |
2869 | } |
|
2870 | ||
2871 | // ----------------------------------------------------- The path and query |
|
2872 | ||
2873 | /** |
|
2874 | * Get the raw-escaped path and query. |
|
2875 | * |
|
2876 | * @return the raw-escaped path and query |
|
2877 | */ |
|
2878 | public char[] getRawPathQuery() { |
|
2879 | ||
2880 | 0 | if (_path == null && _query == null) { |
2881 | 0 | return null; |
2882 | } |
|
2883 | 0 | StringBuffer buff = new StringBuffer(); |
2884 | 0 | if (_path != null) { |
2885 | 0 | buff.append(_path); |
2886 | } |
|
2887 | 0 | if (_query != null) { |
2888 | 0 | buff.append('?'); |
2889 | 0 | buff.append(_query); |
2890 | } |
|
2891 | 0 | return buff.toString().toCharArray(); |
2892 | } |
|
2893 | ||
2894 | ||
2895 | /** |
|
2896 | * Get the escaped query. |
|
2897 | * |
|
2898 | * @return the escaped path and query string |
|
2899 | */ |
|
2900 | public String getEscapedPathQuery() { |
|
2901 | 0 | char[] rawPathQuery = getRawPathQuery(); |
2902 | 0 | return (rawPathQuery == null) ? null : new String(rawPathQuery); |
2903 | } |
|
2904 | ||
2905 | ||
2906 | /** |
|
2907 | * Get the path and query. |
|
2908 | * |
|
2909 | * @return the path and query string. |
|
2910 | * @exception IOException incomplete trailing escape pattern |
|
2911 | * Or unsupported character encoding |
|
2912 | * @see #decode |
|
2913 | */ |
|
2914 | public String getPathQuery() throws IOException { |
|
2915 | 0 | char[] rawPathQuery = getRawPathQuery(); |
2916 | 0 | return (rawPathQuery == null) ? null : decode(rawPathQuery); |
2917 | } |
|
2918 | ||
2919 | // -------------------------------------------------------------- The query |
|
2920 | ||
2921 | /** |
|
2922 | * Set the raw-escaped query. |
|
2923 | * |
|
2924 | * @param escapedQuery the raw-escaped query |
|
2925 | * @exception IOException escaped query not valid |
|
2926 | * @throws NullPointerException null query |
|
2927 | */ |
|
2928 | public void setRawQuery(char[] escapedQuery) throws IOException { |
|
2929 | 0 | if (!validate(escapedQuery, query)) |
2930 | 0 | throw new IOException(/*IOException.ESCAPING,*/ |
2931 | 0 | "URI: escaped query not valid"); |
2932 | 0 | _query = escapedQuery; |
2933 | 0 | setUriReference(); |
2934 | 0 | } |
2935 | ||
2936 | ||
2937 | /** |
|
2938 | * Set the escaped query string. |
|
2939 | * |
|
2940 | * @param escapedQuery the escaped query string |
|
2941 | * @exception IOException escaped query not valid |
|
2942 | * @throws NullPointerException null query |
|
2943 | */ |
|
2944 | public void setEscapedQuery(String escapedQuery) throws IOException { |
|
2945 | 0 | setRawQuery(escapedQuery.toCharArray()); |
2946 | 0 | } |
2947 | ||
2948 | ||
2949 | /** |
|
2950 | * Set the query. |
|
2951 | * When a query string is not misunderstood the reserved special characters |
|
2952 | * ("&", "=", "+", ",", and "$") within a query component, it is |
|
2953 | * recommended to use in encoding the whole query with this method. |
|
2954 | * |
|
2955 | * @param query the query string. |
|
2956 | * @exception IOException incomplete trailing escape pattern |
|
2957 | * Or unsupported character encoding |
|
2958 | * @throws NullPointerException null query |
|
2959 | * @see #encode |
|
2960 | */ |
|
2961 | public void setQuery(String query) throws IOException { |
|
2962 | 0 | setRawQuery(encode(query, allowed_query)); |
2963 | 0 | } |
2964 | ||
2965 | ||
2966 | /** |
|
2967 | * Get the raw-escaped query. |
|
2968 | * |
|
2969 | * @return the raw-escaped query |
|
2970 | */ |
|
2971 | public char[] getRawQuery() { |
|
2972 | 0 | return _query; |
2973 | } |
|
2974 | ||
2975 | ||
2976 | /** |
|
2977 | * Get the escaped query. |
|
2978 | * |
|
2979 | * @return the escaped query string |
|
2980 | */ |
|
2981 | public String getEscapedQuery() { |
|
2982 | 0 | return (_query == null) ? null : new String(_query); |
2983 | } |
|
2984 | ||
2985 | ||
2986 | /** |
|
2987 | * Get the query. |
|
2988 | * |
|
2989 | * @return the query string. |
|
2990 | * @exception IOException incomplete trailing escape pattern |
|
2991 | * Or unsupported character encoding |
|
2992 | * @see #decode |
|
2993 | */ |
|
2994 | public String getQuery() throws IOException { |
|
2995 | 0 | return (_query == null) ? null : decode(_query); |
2996 | } |
|
2997 | ||
2998 | // ----------------------------------------------------------- The fragment |
|
2999 | ||
3000 | /** |
|
3001 | * Set the raw-escaped fragment. |
|
3002 | * |
|
3003 | * @param escapedFragment the raw-escaped fragment |
|
3004 | * @exception IOException escaped fragment not valid |
|
3005 | * @throws NullPointerException null fragment |
|
3006 | */ |
|
3007 | public void setRawFragment(char[] escapedFragment) throws IOException { |
|
3008 | 0 | if (!validate(escapedFragment, fragment)) |
3009 | 0 | throw new IOException(/*IOException.ESCAPING,*/ |
3010 | 0 | "URI: escaped fragment not valid"); |
3011 | 0 | _fragment = escapedFragment; |
3012 | 0 | setUriReference(); |
3013 | 0 | } |
3014 | ||
3015 | ||
3016 | /** |
|
3017 | * Set the escaped fragment string. |
|
3018 | * |
|
3019 | * @param escapedFragment the escaped fragment string |
|
3020 | * @exception IOException escaped fragment not valid |
|
3021 | * @throws NullPointerException null fragment |
|
3022 | */ |
|
3023 | public void setEscapedFragment(String escapedFragment) throws IOException { |
|
3024 | 0 | char[] fragmentSequence = escapedFragment.toCharArray(); |
3025 | 0 | if (!validate(fragmentSequence, fragment)) |
3026 | 0 | throw new IOException(/*IOException.ESCAPING,*/ |
3027 | 0 | "URI: escaped fragment not valid"); |
3028 | 0 | _fragment = fragmentSequence; |
3029 | 0 | setUriReference(); |
3030 | 0 | } |
3031 | ||
3032 | ||
3033 | /** |
|
3034 | * Set the fragment. |
|
3035 | * |
|
3036 | * @param the fragment string. |
|
3037 | * @exception IOException |
|
3038 | * Or unsupported character encoding |
|
3039 | * @throws NullPointerException null fragment |
|
3040 | */ |
|
3041 | public void setFragment(String fragment) throws IOException { |
|
3042 | 0 | _fragment = encode(fragment, allowed_fragment); |
3043 | 0 | setUriReference(); |
3044 | 0 | } |
3045 | ||
3046 | ||
3047 | /** |
|
3048 | * Get the raw-escaped fragment. |
|
3049 | * <p> |
|
3050 | * The optional fragment identifier is not part of a URI, but is often used |
|
3051 | * in conjunction with a URI. |
|
3052 | * <p> |
|
3053 | * The format and interpretation of fragment identifiers is dependent on |
|
3054 | * the media type [RFC2046] of the retrieval result. |
|
3055 | * <p> |
|
3056 | * A fragment identifier is only meaningful when a URI reference is |
|
3057 | * intended for retrieval and the result of that retrieval is a document |
|
3058 | * for which the identified fragment is consistently defined. |
|
3059 | * |
|
3060 | * @return the raw-escaped fragment |
|
3061 | */ |
|
3062 | public char[] getRawFragment() { |
|
3063 | 0 | return _fragment; |
3064 | } |
|
3065 | ||
3066 | ||
3067 | /** |
|
3068 | * Get the escaped fragment. |
|
3069 | * |
|
3070 | * @return the escaped fragment string |
|
3071 | */ |
|
3072 | public String getEscapedFragment() { |
|
3073 | 0 | return (_fragment == null) ? null : new String(_fragment); |
3074 | } |
|
3075 | ||
3076 | ||
3077 | /** |
|
3078 | * Get the fragment. |
|
3079 | * |
|
3080 | * @return the fragment string |
|
3081 | * @exception IOException incomplete trailing escape pattern |
|
3082 | * Or unsupported character encoding |
|
3083 | * @see #decode |
|
3084 | */ |
|
3085 | public String getFragment() throws IOException { |
|
3086 | 0 | return (_fragment == null) ? null : decode(_fragment); |
3087 | } |
|
3088 | ||
3089 | // ------------------------------------------------------------- Utilities |
|
3090 | ||
3091 | /** |
|
3092 | * Normalize the given hier path part. |
|
3093 | * |
|
3094 | * @param path the path to normalize |
|
3095 | * @return the normalized path |
|
3096 | */ |
|
3097 | protected char[] normalize(char[] path) { |
|
3098 | ||
3099 | 0 | if (path == null) return null; |
3100 | ||
3101 | 0 | String normalized = new String(path); |
3102 | 0 | boolean endsWithSlash = true; |
3103 | // precondition |
|
3104 | 0 | if (!normalized.endsWith("/")) { |
3105 | 0 | normalized += '/'; |
3106 | 0 | endsWithSlash = false; |
3107 | } |
|
3108 | 0 | if (normalized.endsWith("/./") || normalized.endsWith("/../")) { |
3109 | 0 | endsWithSlash = true; |
3110 | } |
|
3111 | // Resolve occurrences of "/./" in the normalized path |
|
3112 | 0 | while (true) { |
3113 | 0 | int at = normalized.indexOf("/./"); |
3114 | 0 | if (at == -1) { |
3115 | 0 | break; |
3116 | } |
|
3117 | 0 | normalized = normalized.substring(0, at) + |
3118 | 0 | normalized.substring(at + 2); |
3119 | 0 | } |
3120 | // Resolve occurrences of "/../" in the normalized path |
|
3121 | 0 | while (true) { |
3122 | 0 | int at = normalized.indexOf("/../"); |
3123 | 0 | if (at == -1) { |
3124 | 0 | break; |
3125 | } |
|
3126 | 0 | if (at == 0) { |
3127 | 0 | normalized = "/"; |
3128 | 0 | break; |
3129 | } |
|
3130 | 0 | int backward = normalized.lastIndexOf('/', at - 1); |
3131 | 0 | if (backward == -1) { |
3132 | // consider the rel_path |
|
3133 | 0 | normalized = normalized.substring(at + 4); |
3134 | 0 | } else { |
3135 | 0 | normalized = normalized.substring(0, backward) + |
3136 | 0 | normalized.substring(at + 3); |
3137 | } |
|
3138 | 0 | } |
3139 | // Resolve occurrences of "//" in the normalized path |
|
3140 | 0 | while (true) { |
3141 | 0 | int at = normalized.indexOf("//"); |
3142 | 0 | if (at == -1) { |
3143 | 0 | break; |
3144 | } |
|
3145 | 0 | normalized = normalized.substring(0, at) + |
3146 | 0 | normalized.substring(at + 1); |
3147 | 0 | } |
3148 | 0 | if (!endsWithSlash && normalized.endsWith("/")) { |
3149 | 0 | normalized = normalized.substring(0, normalized.length()-1); |
3150 | 0 | } else if (endsWithSlash && !normalized.endsWith("/")) { |
3151 | 0 | normalized = normalized + "/"; |
3152 | } |
|
3153 | // Set the normalized path that we have completed |
|
3154 | 0 | return normalized.toCharArray(); |
3155 | } |
|
3156 | ||
3157 | ||
3158 | /** |
|
3159 | * Normalize the path part of this URI. |
|
3160 | */ |
|
3161 | public void normalize() { |
|
3162 | 0 | _path = normalize(_path); |
3163 | 0 | } |
3164 | ||
3165 | ||
3166 | /** |
|
3167 | * Test if the first array is equal to the second array. |
|
3168 | * |
|
3169 | * @param first the first character array |
|
3170 | * @param second the second character array |
|
3171 | * @return true if they're equal |
|
3172 | */ |
|
3173 | protected boolean equals(char[] first, char[] second) { |
|
3174 | ||
3175 | 0 | if (first == null && second == null) { |
3176 | 0 | return true; |
3177 | } |
|
3178 | 0 | if (first == null || second == null) { |
3179 | 0 | return false; |
3180 | } |
|
3181 | 0 | if (first.length != second.length) { |
3182 | 0 | return false; |
3183 | } |
|
3184 | 0 | for (int i = 0; i < first.length; i++) { |
3185 | 0 | if (first[i] != second[i]) { |
3186 | 0 | return false; |
3187 | } |
|
3188 | } |
|
3189 | 0 | return true; |
3190 | } |
|
3191 | ||
3192 | ||
3193 | /** |
|
3194 | * Test an object if this URI is equal to another. |
|
3195 | * |
|
3196 | * @param obj an object to compare |
|
3197 | * @return true if two URI objects are equal |
|
3198 | */ |
|
3199 | public boolean equals(Object obj) { |
|
3200 | ||
3201 | // normalize and test each components |
|
3202 | 0 | if (obj == this) { |
3203 | 0 | return true; |
3204 | } |
|
3205 | 0 | if (!(obj instanceof URI)) { |
3206 | 0 | return false; |
3207 | } |
|
3208 | 0 | URI another = (URI) obj; |
3209 | // scheme |
|
3210 | 0 | if (!equals(_scheme, another._scheme)) { |
3211 | 0 | return false; |
3212 | } |
|
3213 | // is_opaque_part or is_hier_part? and opaque |
|
3214 | 0 | if (!equals(_opaque, another._opaque)) { |
3215 | 0 | return false; |
3216 | } |
|
3217 | // is_hier_part |
|
3218 | // has_authority |
|
3219 | 0 | if (!equals(_authority, another._authority)) { |
3220 | 0 | return false; |
3221 | } |
|
3222 | // path |
|
3223 | 0 | if (!equals(_path, another._path)) { |
3224 | 0 | return false; |
3225 | } |
|
3226 | // has_query |
|
3227 | 0 | if (!equals(_query, another._query)) { |
3228 | 0 | return false; |
3229 | } |
|
3230 | // has_fragment? should be careful of the only fragment case. |
|
3231 | 0 | if (!equals(_fragment, another._fragment)) { |
3232 | 0 | return false; |
3233 | } |
|
3234 | 0 | return true; |
3235 | } |
|
3236 | ||
3237 | // ---------------------------------------------------------- Serialization |
|
3238 | ||
3239 | /** |
|
3240 | * Write the content of this URI. |
|
3241 | * |
|
3242 | * @param oos the object-output stream |
|
3243 | */ |
|
3244 | protected void writeObject(java.io.ObjectOutputStream oos) |
|
3245 | throws IOException { |
|
3246 | ||
3247 | 0 | oos.defaultWriteObject(); |
3248 | 0 | } |
3249 | ||
3250 | ||
3251 | /** |
|
3252 | * Read a URI. |
|
3253 | * |
|
3254 | * @param ois the object-input stream |
|
3255 | */ |
|
3256 | protected void readObject(java.io.ObjectInputStream ois) |
|
3257 | throws ClassNotFoundException, IOException { |
|
3258 | ||
3259 | 0 | ois.defaultReadObject(); |
3260 | 0 | } |
3261 | ||
3262 | // ------------------------------------------------------------- Comparison |
|
3263 | ||
3264 | /** |
|
3265 | * Compare this URI to another object. |
|
3266 | * |
|
3267 | * @param obj the object to be compared. |
|
3268 | * @return 0, if it's same, |
|
3269 | * -1, if failed, first being compared with in the authority component |
|
3270 | * @exception ClassCastException not URI argument |
|
3271 | * @throws NullPointerException null object |
|
3272 | */ |
|
3273 | public int compareTo(Object obj) { |
|
3274 | ||
3275 | 0 | URI another = (URI) obj; |
3276 | 0 | if (!equals(_authority, another.getRawAuthority())) return -1; |
3277 | 0 | return toString().compareTo(another.toString()); |
3278 | } |
|
3279 | ||
3280 | // ------------------------------------------------------------------ Clone |
|
3281 | ||
3282 | /** |
|
3283 | * Create and return a copy of this object, the URI-reference containing |
|
3284 | * the userinfo component. Notice that the whole URI-reference including |
|
3285 | * the userinfo component counld not be gotten as a <code>String</code>. |
|
3286 | * <p> |
|
3287 | * To copy the identical <code>URI</code> object including the userinfo |
|
3288 | * component, it should be used. |
|
3289 | * |
|
3290 | * @return a clone of this instance |
|
3291 | */ |
|
3292 | public synchronized Object clone() { |
|
3293 | ||
3294 | 0 | URI instance = new URI(); |
3295 | ||
3296 | 0 | instance._uri = _uri; |
3297 | 0 | instance._scheme = _scheme; |
3298 | 0 | instance._opaque = _opaque; |
3299 | 0 | instance._authority = _authority; |
3300 | 0 | instance._userinfo = _userinfo; |
3301 | 0 | instance._host = _host; |
3302 | 0 | instance._port = _port; |
3303 | 0 | instance._path = _path; |
3304 | 0 | instance._query = _query; |
3305 | 0 | instance._fragment = _fragment; |
3306 | // flags |
|
3307 | 0 | instance._is_hier_part = _is_hier_part; |
3308 | 0 | instance._is_opaque_part = _is_opaque_part; |
3309 | 0 | instance._is_net_path = _is_net_path; |
3310 | 0 | instance._is_abs_path = _is_abs_path; |
3311 | 0 | instance._is_rel_path = _is_rel_path; |
3312 | 0 | instance._is_reg_name = _is_reg_name; |
3313 | 0 | instance._is_server = _is_server; |
3314 | 0 | instance._is_hostname = _is_hostname; |
3315 | 0 | instance._is_IPv4address = _is_IPv4address; |
3316 | 0 | instance._is_IPv6reference = _is_IPv6reference; |
3317 | ||
3318 | 0 | return instance; |
3319 | } |
|
3320 | ||
3321 | // ------------------------------------------------------------ Get the URI |
|
3322 | ||
3323 | /** |
|
3324 | * It can be gotten the URI character sequence. It's raw-escaped. |
|
3325 | * For the purpose of the protocol to be transported, it will be useful. |
|
3326 | * <p> |
|
3327 | * It is clearly unwise to use a URL that contains a password which is |
|
3328 | * intended to be secret. In particular, the use of a password within |
|
3329 | * the 'userinfo' component of a URL is strongly disrecommended except |
|
3330 | * in those rare cases where the 'password' parameter is intended to be |
|
3331 | * public. |
|
3332 | * <p> |
|
3333 | * When you want to get each part of the userinfo, you need to use the |
|
3334 | * specific methods in the specific URL. It depends on the specific URL. |
|
3335 | * |
|
3336 | * @return URI character sequence |
|
3337 | */ |
|
3338 | public char[] getRawURI() { |
|
3339 | 0 | return _uri; |
3340 | } |
|
3341 | ||
3342 | ||
3343 | /** |
|
3344 | * It can be gotten the URI character sequence. It's escaped. |
|
3345 | * For the purpose of the protocol to be transported, it will be useful. |
|
3346 | * |
|
3347 | * @return the URI string |
|
3348 | */ |
|
3349 | public String getEscapedURI() { |
|
3350 | 0 | return (_uri == null) ? null : new String(_uri); |
3351 | } |
|
3352 | ||
3353 | ||
3354 | /** |
|
3355 | * It can be gotten the URI character sequence. |
|
3356 | * |
|
3357 | * @return the URI string |
|
3358 | * @exception IOException incomplete trailing escape pattern |
|
3359 | * Or unsupported character encoding |
|
3360 | * @see #decode |
|
3361 | */ |
|
3362 | public String getURI() throws IOException { |
|
3363 | 0 | return (_uri == null) ? null : decode(_uri); |
3364 | } |
|
3365 | ||
3366 | ||
3367 | /** |
|
3368 | * Get the escaped URI string. |
|
3369 | * <p> |
|
3370 | * On the document, the URI-reference form is only used without the userinfo |
|
3371 | * component like http://jakarta.apache.org/ by the security reason. |
|
3372 | * But the URI-reference form with the userinfo component could be parsed. |
|
3373 | * <p> |
|
3374 | * In other words, this URI and any its subclasses must not expose the |
|
3375 | * URI-reference expression with the userinfo component like |
|
3376 | * http://user:password@hostport/restricted_zone.<br> |
|
3377 | * It means that the API client programmer should extract each user and |
|
3378 | * password to access manually. Probably it will be supported in the each |
|
3379 | * subclass, however, not a whole URI-reference expression. |
|
3380 | * |
|
3381 | * @return the URI string |
|
3382 | * @see #clone() |
|
3383 | */ |
|
3384 | public String toString() { |
|
3385 | 0 | return getEscapedURI(); |
3386 | } |
|
3387 | ||
3388 | ||
3389 | // ------------------------------------------------------------ Inner class |
|
3390 | ||
3391 | /** |
|
3392 | * A mapping to determine the (somewhat arbitrarily) preferred charset for |
|
3393 | * a given locale. Supports all locales recognized in JDK 1.1. |
|
3394 | * <p> |
|
3395 | * The distribution of this class is Servlets.com. It was originally |
|
3396 | * written by Jason Hunter [jhunter at acm.org] and used by with permission. |
|
3397 | */ |
|
3398 | 0 | public static class LocaleToCharsetMap { |
3399 | ||
3400 | private static Hashtable map; |
|
3401 | static { |
|
3402 | 0 | map = new Hashtable(); |
3403 | 0 | map.put("ar", "ISO-8859-6"); |
3404 | 0 | map.put("be", "ISO-8859-5"); |
3405 | 0 | map.put("bg", "ISO-8859-5"); |
3406 | 0 | map.put("ca", "ISO-8859-1"); |
3407 | 0 | map.put("cs", "ISO-8859-2"); |
3408 | 0 | map.put("da", "ISO-8859-1"); |
3409 | 0 | map.put("de", "ISO-8859-1"); |
3410 | 0 | map.put("el", "ISO-8859-7"); |
3411 | 0 | map.put("en", "ISO-8859-1"); |
3412 | 0 | map.put("es", "ISO-8859-1"); |
3413 | 0 | map.put("et", "ISO-8859-1"); |
3414 | 0 | map.put("fi", "ISO-8859-1"); |
3415 | 0 | map.put("fr", "ISO-8859-1"); |
3416 | 0 | map.put("hr", "ISO-8859-2"); |
3417 | 0 | map.put("hu", "ISO-8859-2"); |
3418 | 0 | map.put("is", "ISO-8859-1"); |
3419 | 0 | map.put("it", "ISO-8859-1"); |
3420 | 0 | map.put("iw", "ISO-8859-8"); |
3421 | 0 | map.put("ja", "Shift_JIS"); |
3422 | 0 | map.put("ko", "EUC-KR"); |
3423 | 0 | map.put("lt", "ISO-8859-2"); |
3424 | 0 | map.put("lv", "ISO-8859-2"); |
3425 | 0 | map.put("mk", "ISO-8859-5"); |
3426 | 0 | map.put("nl", "ISO-8859-1"); |
3427 | 0 | map.put("no", "ISO-8859-1"); |
3428 | 0 | map.put("pl", "ISO-8859-2"); |
3429 | 0 | map.put("pt", "ISO-8859-1"); |
3430 | 0 | map.put("ro", "ISO-8859-2"); |
3431 | 0 | map.put("ru", "ISO-8859-5"); |
3432 | 0 | map.put("sh", "ISO-8859-5"); |
3433 | 0 | map.put("sk", "ISO-8859-2"); |
3434 | 0 | map.put("sl", "ISO-8859-2"); |
3435 | 0 | map.put("sq", "ISO-8859-2"); |
3436 | 0 | map.put("sr", "ISO-8859-5"); |
3437 | 0 | map.put("sv", "ISO-8859-1"); |
3438 | 0 | map.put("tr", "ISO-8859-9"); |
3439 | 0 | map.put("uk", "ISO-8859-5"); |
3440 | 0 | map.put("zh", "GB2312"); |
3441 | 0 | map.put("zh_TW", "Big5"); |
3442 | 0 | } |
3443 | ||
3444 | /** |
|
3445 | * Get the preferred charset for the given locale. |
|
3446 | * |
|
3447 | * @param locale the locale |
|
3448 | * @return the preferred charset |
|
3449 | * or null if the locale is not recognized |
|
3450 | */ |
|
3451 | public static String getCharset(Locale locale) { |
|
3452 | // try for an full name match (may include country) |
|
3453 | 0 | String charset = (String) map.get(locale.toString()); |
3454 | 0 | if (charset != null) return charset; |
3455 | ||
3456 | // if a full name didn't match, try just the language |
|
3457 | 0 | charset = (String) map.get(locale.getLanguage()); |
3458 | 0 | return charset; // may be null |
3459 | } |
|
3460 | ||
3461 | } |
|
3462 | ||
3463 | } |
|
3464 |