Blame - java/net/URI.java - platform/prebuilts/fullsdk/sources/android-30

blob: ebba41bc559b8db0eceec60feef59e36bd5ef4f3 [file] [log] [blame]

Alan Viverette	3da604b	2020-06-10 18:34:39 +0000	[diff] [blame]	1	/*
				2	* Copyright (C) 2014 The Android Open Source Project
				3	* Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
				4	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				5	*
				6	* This code is free software; you can redistribute it and/or modify it
				7	* under the terms of the GNU General Public License version 2 only, as
				8	* published by the Free Software Foundation. Oracle designates this
				9	* particular file as subject to the "Classpath" exception as provided
				10	* by Oracle in the LICENSE file that accompanied this code.
				11	*
				12	* This code is distributed in the hope that it will be useful, but WITHOUT
				13	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				14	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				15	* version 2 for more details (a copy is included in the LICENSE file that
				16	* accompanied this code).
				17	*
				18	* You should have received a copy of the GNU General Public License version
				19	* 2 along with this work; if not, write to the Free Software Foundation,
				20	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				21	*
				22	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
				23	* or visit www.oracle.com if you need additional information or have any
				24	* questions.
				25	*/
				26
				27	package java.net;
				28
				29	import java.io.IOException;
				30	import java.io.InvalidObjectException;
				31	import java.io.ObjectInputStream;
				32	import java.io.ObjectOutputStream;
				33	import java.io.Serializable;
				34	import java.nio.ByteBuffer;
				35	import java.nio.CharBuffer;
				36	import java.nio.charset.CharsetDecoder;
				37	import java.nio.charset.CoderResult;
				38	import java.nio.charset.CodingErrorAction;
				39	import java.nio.charset.CharacterCodingException;
				40	import java.text.Normalizer;
				41	import sun.nio.cs.ThreadLocalCoders;
				42
				43	import java.lang.Character; // for javadoc
				44	import java.lang.NullPointerException; // for javadoc
				45
				46
				47	// Android-changed: Reformat @see links.
				48	/**
				49	* Represents a Uniform Resource Identifier (URI) reference.
				50	*
				51	* <p> Aside from some minor deviations noted below, an instance of this
				52	* class represents a URI reference as defined by
				53	* <a href="http://www.ietf.org/rfc/rfc2396.txt"><i>RFC 2396: Uniform
				54	* Resource Identifiers (URI): Generic Syntax</i></a>, amended by <a
				55	* href="http://www.ietf.org/rfc/rfc2732.txt"><i>RFC 2732: Format for
				56	* Literal IPv6 Addresses in URLs</i></a>. The Literal IPv6 address format
				57	* also supports scope_ids. The syntax and usage of scope_ids is described
				58	* <a href="Inet6Address.html#scoped">here</a>.
				59	* This class provides constructors for creating URI instances from
				60	* their components or by parsing their string forms, methods for accessing the
				61	* various components of an instance, and methods for normalizing, resolving,
				62	* and relativizing URI instances. Instances of this class are immutable.
				63	*
				64	*
				65	* <h3> URI syntax and components </h3>
				66	*
				67	* At the highest level a URI reference (hereinafter simply "URI") in string
				68	* form has the syntax
				69	*
				70	* <blockquote>
				71	* [<i>scheme</i><b>{@code :}</b>]<i>scheme-specific-part</i>[<b>{@code #}</b><i>fragment</i>]
				72	* </blockquote>
				73	*
				74	* where square brackets [...] delineate optional components and the characters
				75	* <b>{@code :}</b> and <b>{@code #}</b> stand for themselves.
				76	*
				77	* <p> An <i>absolute</i> URI specifies a scheme; a URI that is not absolute is
				78	* said to be <i>relative</i>. URIs are also classified according to whether
				79	* they are <i>opaque</i> or <i>hierarchical</i>.
				80	*
				81	* <p> An <i>opaque</i> URI is an absolute URI whose scheme-specific part does
				82	* not begin with a slash character ({@code '/'}). Opaque URIs are not
				83	* subject to further parsing. Some examples of opaque URIs are:
				84	*
				85	* <blockquote><table cellpadding=0 cellspacing=0 summary="layout">
				86	* <tr><td>{@code mailto:java-net@java.sun.com}<td></tr>
				87	* <tr><td>{@code news:comp.lang.java}<td></tr>
				88	* <tr><td>{@code urn:isbn:096139210x}</td></tr>
				89	* </table></blockquote>
				90	*
				91	* <p> A <i>hierarchical</i> URI is either an absolute URI whose
				92	* scheme-specific part begins with a slash character, or a relative URI, that
				93	* is, a URI that does not specify a scheme. Some examples of hierarchical
				94	* URIs are:
				95	*
				96	* <blockquote>
				97	* {@code http://java.sun.com/j2se/1.3/}<br>
				98	* {@code docs/guide/collections/designfaq.html#28}<br>
				99	* {@code ../../../demo/jfc/SwingSet2/src/SwingSet2.java}<br>
				100	* {@code file:///~/calendar}
				101	* </blockquote>
				102	*
				103	* <p> A hierarchical URI is subject to further parsing according to the syntax
				104	*
				105	* <blockquote>
				106	* [<i>scheme</i><b>{@code :}</b>][<b>{@code //}</b><i>authority</i>][<i>path</i>][<b>{@code ?}</b><i>query</i>][<b>{@code #}</b><i>fragment</i>]
				107	* </blockquote>
				108	*
				109	* where the characters <b>{@code :}</b>, <b>{@code /}</b>,
				110	* <b>{@code ?}</b>, and <b>{@code #}</b> stand for themselves. The
				111	* scheme-specific part of a hierarchical URI consists of the characters
				112	* between the scheme and fragment components.
				113	*
				114	* <p> The authority component of a hierarchical URI is, if specified, either
				115	* <i>server-based</i> or <i>registry-based</i>. A server-based authority
				116	* parses according to the familiar syntax
				117	*
				118	* <blockquote>
				119	* [<i>user-info</i><b>{@code @}</b>]<i>host</i>[<b>{@code :}</b><i>port</i>]
				120	* </blockquote>
				121	*
				122	* where the characters <b>{@code @}</b> and <b>{@code :}</b> stand for
				123	* themselves. Nearly all URI schemes currently in use are server-based. An
				124	* authority component that does not parse in this way is considered to be
				125	* registry-based.
				126	*
				127	* <p> The path component of a hierarchical URI is itself said to be absolute
				128	* if it begins with a slash character ({@code '/'}); otherwise it is
				129	* relative. The path of a hierarchical URI that is either absolute or
				130	* specifies an authority is always absolute.
				131	*
				132	* <p> All told, then, a URI instance has the following nine components:
				133	*
				134	* <blockquote><table summary="Describes the components of a URI:scheme,scheme-specific-part,authority,user-info,host,port,path,query,fragment">
				135	* <tr><th><i>Component</i></th><th><i>Type</i></th></tr>
				136	* <tr><td>scheme</td><td>{@code String}</td></tr>
				137	* <tr><td>scheme-specific-part    </td><td>{@code String}</td></tr>
				138	* <tr><td>authority</td><td>{@code String}</td></tr>
				139	* <tr><td>user-info</td><td>{@code String}</td></tr>
				140	* <tr><td>host</td><td>{@code String}</td></tr>
				141	* <tr><td>port</td><td>{@code int}</td></tr>
				142	* <tr><td>path</td><td>{@code String}</td></tr>
				143	* <tr><td>query</td><td>{@code String}</td></tr>
				144	* <tr><td>fragment</td><td>{@code String}</td></tr>
				145	* </table></blockquote>
				146	*
				147	* In a given instance any particular component is either <i>undefined</i> or
				148	* <i>defined</i> with a distinct value. Undefined string components are
				149	* represented by {@code null}, while undefined integer components are
				150	* represented by {@code -1}. A string component may be defined to have the
				151	* empty string as its value; this is not equivalent to that component being
				152	* undefined.
				153	*
				154	* <p> Whether a particular component is or is not defined in an instance
				155	* depends upon the type of the URI being represented. An absolute URI has a
				156	* scheme component. An opaque URI has a scheme, a scheme-specific part, and
				157	* possibly a fragment, but has no other components. A hierarchical URI always
				158	* has a path (though it may be empty) and a scheme-specific-part (which at
				159	* least contains the path), and may have any of the other components. If the
				160	* authority component is present and is server-based then the host component
				161	* will be defined and the user-information and port components may be defined.
				162	*
				163	*
				164	* <h4> Operations on URI instances </h4>
				165	*
				166	* The key operations supported by this class are those of
				167	* <i>normalization</i>, <i>resolution</i>, and <i>relativization</i>.
				168	*
				169	* <p> <i>Normalization</i> is the process of removing unnecessary {@code "."}
				170	* and {@code ".."} segments from the path component of a hierarchical URI.
				171	* Each {@code "."} segment is simply removed. A {@code ".."} segment is
				172	* removed only if it is preceded by a non-{@code ".."} segment.
				173	* Normalization has no effect upon opaque URIs.
				174	*
				175	* <p> <i>Resolution</i> is the process of resolving one URI against another,
				176	* <i>base</i> URI. The resulting URI is constructed from components of both
				177	* URIs in the manner specified by RFC 2396, taking components from the
				178	* base URI for those not specified in the original. For hierarchical URIs,
				179	* the path of the original is resolved against the path of the base and then
				180	* normalized. The result, for example, of resolving
				181	*
				182	* <blockquote>
				183	* {@code docs/guide/collections/designfaq.html#28}
				184	*
				185	*     (1)
				186	* </blockquote>
				187	*
				188	* against the base URI {@code http://java.sun.com/j2se/1.3/} is the result
				189	* URI
				190	*
				191	* <blockquote>
				192	* {@code http://java.sun.com/j2se/1.3/docs/guide/collections/designfaq.html#28}
				193	* </blockquote>
				194	*
				195	* Resolving the relative URI
				196	*
				197	* <blockquote>
				198	* {@code ../../../demo/jfc/SwingSet2/src/SwingSet2.java}    (2)
				199	* </blockquote>
				200	*
				201	* against this result yields, in turn,
				202	*
				203	* <blockquote>
				204	* {@code http://java.sun.com/j2se/1.3/demo/jfc/SwingSet2/src/SwingSet2.java}
				205	* </blockquote>
				206	*
				207	* Resolution of both absolute and relative URIs, and of both absolute and
				208	* relative paths in the case of hierarchical URIs, is supported. Resolving
				209	* the URI {@code file:///~calendar} against any other URI simply yields the
				210	* original URI, since it is absolute. Resolving the relative URI (2) above
				211	* against the relative base URI (1) yields the normalized, but still relative,
				212	* URI
				213	*
				214	* <blockquote>
				215	* {@code demo/jfc/SwingSet2/src/SwingSet2.java}
				216	* </blockquote>
				217	*
				218	* <p> <i>Relativization</i>, finally, is the inverse of resolution: For any
				219	* two normalized URIs <i>u</i> and <i>v</i>,
				220	*
				221	* <blockquote>
				222	* <i>u</i>{@code .relativize(}<i>u</i>{@code .resolve(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}  and<br>
				223	* <i>u</i>{@code .resolve(}<i>u</i>{@code .relativize(}<i>v</i>{@code )).equals(}<i>v</i>{@code )}  .<br>
				224	* </blockquote>
				225	*
				226	* This operation is often useful when constructing a document containing URIs
				227	* that must be made relative to the base URI of the document wherever
				228	* possible. For example, relativizing the URI
				229	*
				230	* <blockquote>
				231	* {@code http://java.sun.com/j2se/1.3/docs/guide/index.html}
				232	* </blockquote>
				233	*
				234	* against the base URI
				235	*
				236	* <blockquote>
				237	* {@code http://java.sun.com/j2se/1.3}
				238	* </blockquote>
				239	*
				240	* yields the relative URI {@code docs/guide/index.html}.
				241	*
				242	*
				243	* <h4> Character categories </h4>
				244	*
				245	* RFC 2396 specifies precisely which characters are permitted in the
				246	* various components of a URI reference. The following categories, most of
				247	* which are taken from that specification, are used below to describe these
				248	* constraints:
				249	*
				250	* <blockquote><table cellspacing=2 summary="Describes categories alpha,digit,alphanum,unreserved,punct,reserved,escaped,and other">
				251	* <tr><th valign=top><i>alpha</i></th>
				252	* <td>The US-ASCII alphabetic characters,
				253	* {@code 'A'} through {@code 'Z'}
				254	* and {@code 'a'} through {@code 'z'}</td></tr>
				255	* <tr><th valign=top><i>digit</i></th>
				256	* <td>The US-ASCII decimal digit characters,
				257	* {@code '0'} through {@code '9'}</td></tr>
				258	* <tr><th valign=top><i>alphanum</i></th>
				259	* <td>All <i>alpha</i> and <i>digit</i> characters</td></tr>
				260	* <tr><th valign=top><i>unreserved</i>    </th>
				261	* <td>All <i>alphanum</i> characters together with those in the string
				262	* {@code "_-!.~'()*"}</td></tr>
				263	* <tr><th valign=top><i>punct</i></th>
				264	* <td>The characters in the string {@code ",;:$&+="}</td></tr>
				265	* <tr><th valign=top><i>reserved</i></th>
				266	* <td>All <i>punct</i> characters together with those in the string
				267	* {@code "?/[]@"}</td></tr>
				268	* <tr><th valign=top><i>escaped</i></th>
				269	* <td>Escaped octets, that is, triplets consisting of the percent
				270	* character ({@code '%'}) followed by two hexadecimal digits
				271	* ({@code '0'}-{@code '9'}, {@code 'A'}-{@code 'F'}, and
				272	* {@code 'a'}-{@code 'f'})</td></tr>
				273	* <tr><th valign=top><i>other</i></th>
				274	* <td>The Unicode characters that are not in the US-ASCII character set,
				275	* are not control characters (according to the {@link
				276	* java.lang.Character#isISOControl(char) Character.isISOControl}
				277	* method), and are not space characters (according to the {@link
				278	* java.lang.Character#isSpaceChar(char) Character.isSpaceChar}
				279	* method)  <i>(<b>Deviation from RFC 2396</b>, which is
				280	* limited to US-ASCII)</i></td></tr>
				281	* </table></blockquote>
				282	*
				283	* <p><a name="legal-chars"></a> The set of all legal URI characters consists of
				284	* the <i>unreserved</i>, <i>reserved</i>, <i>escaped</i>, and <i>other</i>
				285	* characters.
				286	*
				287	*
				288	* <h4> Escaped octets, quotation, encoding, and decoding </h4>
				289	*
				290	* RFC 2396 allows escaped octets to appear in the user-info, path, query, and
				291	* fragment components. Escaping serves two purposes in URIs:
				292	*
				293	* <ul>
				294	*
				295	* <li><p> To <i>encode</i> non-US-ASCII characters when a URI is required to
				296	* conform strictly to RFC 2396 by not containing any <i>other</i>
				297	* characters. </p></li>
				298	*
				299	* <li><p> To <i>quote</i> characters that are otherwise illegal in a
				300	* component. The user-info, path, query, and fragment components differ
				301	* slightly in terms of which characters are considered legal and illegal.
				302	* </p></li>
				303	*
				304	* </ul>
				305	*
				306	* These purposes are served in this class by three related operations:
				307	*
				308	* <ul>
				309	*
				310	* <li><p><a name="encode"></a> A character is <i>encoded</i> by replacing it
				311	* with the sequence of escaped octets that represent that character in the
				312	* UTF-8 character set. The Euro currency symbol ({@code '\u005Cu20AC'}),
				313	* for example, is encoded as {@code "%E2%82%AC"}. <i>(<b>Deviation from
				314	* RFC 2396</b>, which does not specify any particular character
				315	* set.)</i> </p></li>
				316	*
				317	* <li><p><a name="quote"></a> An illegal character is <i>quoted</i> simply by
				318	* encoding it. The space character, for example, is quoted by replacing it
				319	* with {@code "%20"}. UTF-8 contains US-ASCII, hence for US-ASCII
				320	* characters this transformation has exactly the effect required by
				321	* RFC 2396. </p></li>
				322	*
				323	* <li><p><a name="decode"></a>
				324	* A sequence of escaped octets is <i>decoded</i> by
				325	* replacing it with the sequence of characters that it represents in the
				326	* UTF-8 character set. UTF-8 contains US-ASCII, hence decoding has the
				327	* effect of de-quoting any quoted US-ASCII characters as well as that of
				328	* decoding any encoded non-US-ASCII characters. If a <a
				329	* href="../nio/charset/CharsetDecoder.html#ce">decoding error</a> occurs
				330	* when decoding the escaped octets then the erroneous octets are replaced by
				331	* {@code '\u005CuFFFD'}, the Unicode replacement character. </p></li>
				332	*
				333	* </ul>
				334	*
				335	* These operations are exposed in the constructors and methods of this class
				336	* as follows:
				337	*
				338	* <ul>
				339	*
				340	* <li><p> The {@linkplain #URI(java.lang.String) single-argument
				341	* constructor} requires any illegal characters in its argument to be
				342	* quoted and preserves any escaped octets and <i>other</i> characters that
				343	* are present. </p></li>
				344	*
				345	* <li><p> The {@linkplain
				346	* #URI(java.lang.String,java.lang.String,java.lang.String,int,java.lang.String,java.lang.String,java.lang.String)
				347	* multi-argument constructors} quote illegal characters as
				348	* required by the components in which they appear. The percent character
				349	* ({@code '%'}) is always quoted by these constructors. Any <i>other</i>
				350	* characters are preserved. </p></li>
				351	*
				352	* <li><p> The {@link #getRawUserInfo() getRawUserInfo}, {@link #getRawPath()
				353	* getRawPath}, {@link #getRawQuery() getRawQuery}, {@link #getRawFragment()
				354	* getRawFragment}, {@link #getRawAuthority() getRawAuthority}, and {@link
				355	* #getRawSchemeSpecificPart() getRawSchemeSpecificPart} methods return the
				356	* values of their corresponding components in raw form, without interpreting
				357	* any escaped octets. The strings returned by these methods may contain
				358	* both escaped octets and <i>other</i> characters, and will not contain any
				359	* illegal characters. </p></li>
				360	*
				361	* <li><p> The {@link #getUserInfo() getUserInfo}, {@link #getPath()
				362	* getPath}, {@link #getQuery() getQuery}, {@link #getFragment()
				363	* getFragment}, {@link #getAuthority() getAuthority}, and {@link
				364	* #getSchemeSpecificPart() getSchemeSpecificPart} methods decode any escaped
				365	* octets in their corresponding components. The strings returned by these
				366	* methods may contain both <i>other</i> characters and illegal characters,
				367	* and will not contain any escaped octets. </p></li>
				368	*
				369	* <li><p> The {@link #toString() toString} method returns a URI string with
				370	* all necessary quotation but which may contain <i>other</i> characters.
				371	* </p></li>
				372	*
				373	* <li><p> The {@link #toASCIIString() toASCIIString} method returns a fully
				374	* quoted and encoded URI string that does not contain any <i>other</i>
				375	* characters. </p></li>
				376	*
				377	* </ul>
				378	*
				379	*
				380	* <h4> Identities </h4>
				381	*
				382	* For any URI <i>u</i>, it is always the case that
				383	*
				384	* <blockquote>
				385	* {@code new URI(}<i>u</i>{@code .toString()).equals(}<i>u</i>{@code )} .
				386	* </blockquote>
				387	*
				388	* For any URI <i>u</i> that does not contain redundant syntax such as two
				389	* slashes before an empty authority (as in {@code file:///tmp/} ) or a
				390	* colon following a host name but no port (as in
				391	* {@code http://java.sun.com:} ), and that does not encode characters
				392	* except those that must be quoted, the following identities also hold:
				393	* <pre>
				394	* new URI(<i>u</i>.getScheme(),
				395	* <i>u</i>.getSchemeSpecificPart(),
				396	* <i>u</i>.getFragment())
				397	* .equals(<i>u</i>)</pre>
				398	* in all cases,
				399	* <pre>
				400	* new URI(<i>u</i>.getScheme(),
				401	* <i>u</i>.getUserInfo(), <i>u</i>.getAuthority(),
				402	* <i>u</i>.getPath(), <i>u</i>.getQuery(),
				403	* <i>u</i>.getFragment())
				404	* .equals(<i>u</i>)</pre>
				405	* if <i>u</i> is hierarchical, and
				406	* <pre>
				407	* new URI(<i>u</i>.getScheme(),
				408	* <i>u</i>.getUserInfo(), <i>u</i>.getHost(), <i>u</i>.getPort(),
				409	* <i>u</i>.getPath(), <i>u</i>.getQuery(),
				410	* <i>u</i>.getFragment())
				411	* .equals(<i>u</i>)</pre>
				412	* if <i>u</i> is hierarchical and has either no authority or a server-based
				413	* authority.
				414	*
				415	*
				416	* <h4> URIs, URLs, and URNs </h4>
				417	*
				418	* A URI is a uniform resource <i>identifier</i> while a URL is a uniform
				419	* resource <i>locator</i>. Hence every URL is a URI, abstractly speaking, but
				420	* not every URI is a URL. This is because there is another subcategory of
				421	* URIs, uniform resource <i>names</i> (URNs), which name resources but do not
				422	* specify how to locate them. The {@code mailto}, {@code news}, and
				423	* {@code isbn} URIs shown above are examples of URNs.
				424	*
				425	* <p> The conceptual distinction between URIs and URLs is reflected in the
				426	* differences between this class and the {@link URL} class.
				427	*
				428	* <p> An instance of this class represents a URI reference in the syntactic
				429	* sense defined by RFC 2396. A URI may be either absolute or relative.
				430	* A URI string is parsed according to the generic syntax without regard to the
				431	* scheme, if any, that it specifies. No lookup of the host, if any, is
				432	* performed, and no scheme-dependent stream handler is constructed. Equality,
				433	* hashing, and comparison are defined strictly in terms of the character
				434	* content of the instance. In other words, a URI instance is little more than
				435	* a structured string that supports the syntactic, scheme-independent
				436	* operations of comparison, normalization, resolution, and relativization.
				437	*
				438	* <p> An instance of the {@link URL} class, by contrast, represents the
				439	* syntactic components of a URL together with some of the information required
				440	* to access the resource that it describes. A URL must be absolute, that is,
				441	* it must always specify a scheme. A URL string is parsed according to its
				442	* scheme. A stream handler is always established for a URL, and in fact it is
				443	* impossible to create a URL instance for a scheme for which no handler is
				444	* available. Equality and hashing depend upon both the scheme and the
				445	* Internet address of the host, if any; comparison is not defined. In other
				446	* words, a URL is a structured string that supports the syntactic operation of
				447	* resolution as well as the network I/O operations of looking up the host and
				448	* opening a connection to the specified resource.
				449	*
				450	*
				451	* @author Mark Reinhold
				452	* @since 1.4
				453	*
				454	* @see <a href="http://www.ietf.org/rfc/rfc2279.txt">RFC 2279: UTF-8, a transformation format of ISO 10646</a>
				455	* @see <a href="http://www.ietf.org/rfc/rfc2373.txt">RFC 2373: IPv6 Addressing Architecture</a>
				456	* @see <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396: Uniform Resource Identifiers (URI): Generic Syntax</a>
				457	* @see <a href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732: Format for Literal IPv6 Addresses in URLs</a>
				458	*/
				459
				460	public final class URI
				461	implements Comparable<URI>, Serializable
				462	{
				463
				464	// Note: Comments containing the word "ASSERT" indicate places where a
				465	// throw of an InternalError should be replaced by an appropriate assertion
				466	// statement once asserts are enabled in the build.
				467
				468	static final long serialVersionUID = -6052424284110960213L;
				469
				470
				471	// -- Properties and components of this instance --
				472
				473	// Components of all URIs: [<scheme>:]<scheme-specific-part>[#<fragment>]
				474	private transient String scheme; // null ==> relative URI
				475	private transient String fragment;
				476
				477	// Hierarchical URI components: [//<authority>]<path>[?<query>]
				478	private transient String authority; // Registry or server
				479
				480	// Server-based authority: [<userInfo>@]<host>[:<port>]
				481	private transient String userInfo;
				482	private transient String host; // null ==> registry-based
				483	private transient int port = -1; // -1 ==> undefined
				484
				485	// Remaining components of hierarchical URIs
				486	private transient String path; // null ==> opaque
				487	private transient String query;
				488
				489	// The remaining fields may be computed on demand
				490
				491	private volatile transient String schemeSpecificPart;
				492	private volatile transient int hash; // Zero ==> undefined
				493
				494	private volatile transient String decodedUserInfo = null;
				495	private volatile transient String decodedAuthority = null;
				496	private volatile transient String decodedPath = null;
				497	private volatile transient String decodedQuery = null;
				498	private volatile transient String decodedFragment = null;
				499	private volatile transient String decodedSchemeSpecificPart = null;
				500
				501	/**
				502	* The string form of this URI.
				503	*
				504	* @serial
				505	*/
				506	private volatile String string; // The only serializable field
				507
				508
				509
				510	// -- Constructors and factories --
				511
				512	private URI() { } // Used internally
				513
				514	/**
				515	* Constructs a URI by parsing the given string.
				516	*
				517	* <p> This constructor parses the given string exactly as specified by the
				518	* grammar in <a
				519	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				520	* Appendix A, <b><i>except for the following deviations:</i></b> </p>
				521	*
				522	* <ul>
				523	*
				524	* <li><p> An empty authority component is permitted as long as it is
				525	* followed by a non-empty path, a query component, or a fragment
				526	* component. This allows the parsing of URIs such as
				527	* {@code "file:///foo/bar"}, which seems to be the intent of
				528	* RFC 2396 although the grammar does not permit it. If the
				529	* authority component is empty then the user-information, host, and port
				530	* components are undefined. </p></li>
				531	*
				532	* <li><p> Empty relative paths are permitted; this seems to be the
				533	* intent of RFC 2396 although the grammar does not permit it. The
				534	* primary consequence of this deviation is that a standalone fragment
				535	* such as {@code "#foo"} parses as a relative URI with an empty path
				536	* and the given fragment, and can be usefully <a
				537	* href="#resolve-frag">resolved</a> against a base URI.
				538	*
				539	* <li><p> IPv4 addresses in host components are parsed rigorously, as
				540	* specified by <a
				541	* href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>: Each
				542	* element of a dotted-quad address must contain no more than three
				543	* decimal digits. Each element is further constrained to have a value
				544	* no greater than 255. </p></li>
				545	*
				546	* <li> <p> Hostnames in host components that comprise only a single
				547	* domain label are permitted to start with an <i>alphanum</i>
				548	* character. This seems to be the intent of <a
				549	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
				550	* section 3.2.2 although the grammar does not permit it. The
				551	* consequence of this deviation is that the authority component of a
				552	* hierarchical URI such as {@code s://123}, will parse as a server-based
				553	* authority. </p></li>
				554	*
				555	* <li><p> IPv6 addresses are permitted for the host component. An IPv6
				556	* address must be enclosed in square brackets ({@code '['} and
				557	* {@code ']'}) as specified by <a
				558	* href="http://www.ietf.org/rfc/rfc2732.txt">RFC 2732</a>. The
				559	* IPv6 address itself must parse according to <a
				560	* href="http://www.ietf.org/rfc/rfc2373.txt">RFC 2373</a>. IPv6
				561	* addresses are further constrained to describe no more than sixteen
				562	* bytes of address information, a constraint implicit in RFC 2373
				563	* but not expressible in the grammar. </p></li>
				564	*
				565	* <li><p> Characters in the <i>other</i> category are permitted wherever
				566	* RFC 2396 permits <i>escaped</i> octets, that is, in the
				567	* user-information, path, query, and fragment components, as well as in
				568	* the authority component if the authority is registry-based. This
				569	* allows URIs to contain Unicode characters beyond those in the US-ASCII
				570	* character set. </p></li>
				571	*
				572	* </ul>
				573	*
				574	* @param str The string to be parsed into a URI
				575	*
				576	* @throws NullPointerException
				577	* If {@code str} is {@code null}
				578	*
				579	* @throws URISyntaxException
				580	* If the given string violates RFC 2396, as augmented
				581	* by the above deviations
				582	*/
				583	public URI(String str) throws URISyntaxException {
				584	new Parser(str).parse(false);
				585	}
				586
				587	/**
				588	* Constructs a hierarchical URI from the given components.
				589	*
				590	* <p> If a scheme is given then the path, if also given, must either be
				591	* empty or begin with a slash character ({@code '/'}). Otherwise a
				592	* component of the new URI may be left undefined by passing {@code null}
				593	* for the corresponding parameter or, in the case of the {@code port}
				594	* parameter, by passing {@code -1}.
				595	*
				596	* <p> This constructor first builds a URI string from the given components
				597	* according to the rules specified in <a
				598	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				599	* section 5.2, step 7: </p>
				600	*
				601	* <ol>
				602	*
				603	* <li><p> Initially, the result string is empty. </p></li>
				604	*
				605	* <li><p> If a scheme is given then it is appended to the result,
				606	* followed by a colon character ({@code ':'}). </p></li>
				607	*
				608	* <li><p> If user information, a host, or a port are given then the
				609	* string {@code "//"} is appended. </p></li>
				610	*
				611	* <li><p> If user information is given then it is appended, followed by
				612	* a commercial-at character ({@code '@'}). Any character not in the
				613	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
				614	* categories is <a href="#quote">quoted</a>. </p></li>
				615	*
				616	* <li><p> If a host is given then it is appended. If the host is a
				617	* literal IPv6 address but is not enclosed in square brackets
				618	* ({@code '['} and {@code ']'}) then the square brackets are added.
				619	* </p></li>
				620	*
				621	* <li><p> If a port number is given then a colon character
				622	* ({@code ':'}) is appended, followed by the port number in decimal.
				623	* </p></li>
				624	*
				625	* <li><p> If a path is given then it is appended. Any character not in
				626	* the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
				627	* categories, and not equal to the slash character ({@code '/'}) or the
				628	* commercial-at character ({@code '@'}), is quoted. </p></li>
				629	*
				630	* <li><p> If a query is given then a question-mark character
				631	* ({@code '?'}) is appended, followed by the query. Any character that
				632	* is not a <a href="#legal-chars">legal URI character</a> is quoted.
				633	* </p></li>
				634	*
				635	* <li><p> Finally, if a fragment is given then a hash character
				636	* ({@code '#'}) is appended, followed by the fragment. Any character
				637	* that is not a legal URI character is quoted. </p></li>
				638	*
				639	* </ol>
				640	*
				641	* <p> The resulting URI string is then parsed as if by invoking the {@link
				642	* #URI(String)} constructor and then invoking the {@link
				643	* #parseServerAuthority()} method upon the result; this may cause a {@link
				644	* URISyntaxException} to be thrown. </p>
				645	*
				646	* @param scheme Scheme name
				647	* @param userInfo User name and authorization information
				648	* @param host Host name
				649	* @param port Port number
				650	* @param path Path
				651	* @param query Query
				652	* @param fragment Fragment
				653	*
				654	* @throws URISyntaxException
				655	* If both a scheme and a path are given but the path is relative,
				656	* if the URI string constructed from the given components violates
				657	* RFC 2396, or if the authority component of the string is
				658	* present but cannot be parsed as a server-based authority
				659	*/
				660	public URI(String scheme,
				661	String userInfo, String host, int port,
				662	String path, String query, String fragment)
				663	throws URISyntaxException
				664	{
				665	String s = toString(scheme, null,
				666	null, userInfo, host, port,
				667	path, query, fragment);
				668	checkPath(s, scheme, path);
				669	new Parser(s).parse(true);
				670	}
				671
				672	/**
				673	* Constructs a hierarchical URI from the given components.
				674	*
				675	* <p> If a scheme is given then the path, if also given, must either be
				676	* empty or begin with a slash character ({@code '/'}). Otherwise a
				677	* component of the new URI may be left undefined by passing {@code null}
				678	* for the corresponding parameter.
				679	*
				680	* <p> This constructor first builds a URI string from the given components
				681	* according to the rules specified in <a
				682	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				683	* section 5.2, step 7: </p>
				684	*
				685	* <ol>
				686	*
				687	* <li><p> Initially, the result string is empty. </p></li>
				688	*
				689	* <li><p> If a scheme is given then it is appended to the result,
				690	* followed by a colon character ({@code ':'}). </p></li>
				691	*
				692	* <li><p> If an authority is given then the string {@code "//"} is
				693	* appended, followed by the authority. If the authority contains a
				694	* literal IPv6 address then the address must be enclosed in square
				695	* brackets ({@code '['} and {@code ']'}). Any character not in the
				696	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
				697	* categories, and not equal to the commercial-at character
				698	* ({@code '@'}), is <a href="#quote">quoted</a>. </p></li>
				699	*
				700	* <li><p> If a path is given then it is appended. Any character not in
				701	* the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, or <i>other</i>
				702	* categories, and not equal to the slash character ({@code '/'}) or the
				703	* commercial-at character ({@code '@'}), is quoted. </p></li>
				704	*
				705	* <li><p> If a query is given then a question-mark character
				706	* ({@code '?'}) is appended, followed by the query. Any character that
				707	* is not a <a href="#legal-chars">legal URI character</a> is quoted.
				708	* </p></li>
				709	*
				710	* <li><p> Finally, if a fragment is given then a hash character
				711	* ({@code '#'}) is appended, followed by the fragment. Any character
				712	* that is not a legal URI character is quoted. </p></li>
				713	*
				714	* </ol>
				715	*
				716	* <p> The resulting URI string is then parsed as if by invoking the {@link
				717	* #URI(String)} constructor and then invoking the {@link
				718	* #parseServerAuthority()} method upon the result; this may cause a {@link
				719	* URISyntaxException} to be thrown. </p>
				720	*
				721	* @param scheme Scheme name
				722	* @param authority Authority
				723	* @param path Path
				724	* @param query Query
				725	* @param fragment Fragment
				726	*
				727	* @throws URISyntaxException
				728	* If both a scheme and a path are given but the path is relative,
				729	* if the URI string constructed from the given components violates
				730	* RFC 2396, or if the authority component of the string is
				731	* present but cannot be parsed as a server-based authority
				732	*/
				733	public URI(String scheme,
				734	String authority,
				735	String path, String query, String fragment)
				736	throws URISyntaxException
				737	{
				738	String s = toString(scheme, null,
				739	authority, null, null, -1,
				740	path, query, fragment);
				741	checkPath(s, scheme, path);
				742	new Parser(s).parse(false);
				743	}
				744
				745	/**
				746	* Constructs a hierarchical URI from the given components.
				747	*
				748	* <p> A component may be left undefined by passing {@code null}.
				749	*
				750	* <p> This convenience constructor works as if by invoking the
				751	* seven-argument constructor as follows:
				752	*
				753	* <blockquote>
				754	* {@code new} {@link #URI(String, String, String, int, String, String, String)
				755	* URI}{@code (scheme, null, host, -1, path, null, fragment);}
				756	* </blockquote>
				757	*
				758	* @param scheme Scheme name
				759	* @param host Host name
				760	* @param path Path
				761	* @param fragment Fragment
				762	*
				763	* @throws URISyntaxException
				764	* If the URI string constructed from the given components
				765	* violates RFC 2396
				766	*/
				767	public URI(String scheme, String host, String path, String fragment)
				768	throws URISyntaxException
				769	{
				770	this(scheme, null, host, -1, path, null, fragment);
				771	}
				772
				773	/**
				774	* Constructs a URI from the given components.
				775	*
				776	* <p> A component may be left undefined by passing {@code null}.
				777	*
				778	* <p> This constructor first builds a URI in string form using the given
				779	* components as follows: </p>
				780	*
				781	* <ol>
				782	*
				783	* <li><p> Initially, the result string is empty. </p></li>
				784	*
				785	* <li><p> If a scheme is given then it is appended to the result,
				786	* followed by a colon character ({@code ':'}). </p></li>
				787	*
				788	* <li><p> If a scheme-specific part is given then it is appended. Any
				789	* character that is not a <a href="#legal-chars">legal URI character</a>
				790	* is <a href="#quote">quoted</a>. </p></li>
				791	*
				792	* <li><p> Finally, if a fragment is given then a hash character
				793	* ({@code '#'}) is appended to the string, followed by the fragment.
				794	* Any character that is not a legal URI character is quoted. </p></li>
				795	*
				796	* </ol>
				797	*
				798	* <p> The resulting URI string is then parsed in order to create the new
				799	* URI instance as if by invoking the {@link #URI(String)} constructor;
				800	* this may cause a {@link URISyntaxException} to be thrown. </p>
				801	*
				802	* @param scheme Scheme name
				803	* @param ssp Scheme-specific part
				804	* @param fragment Fragment
				805	*
				806	* @throws URISyntaxException
				807	* If the URI string constructed from the given components
				808	* violates RFC 2396
				809	*/
				810	public URI(String scheme, String ssp, String fragment)
				811	throws URISyntaxException
				812	{
				813	new Parser(toString(scheme, ssp,
				814	null, null, null, -1,
				815	null, null, fragment))
				816	.parse(false);
				817	}
				818
				819	/**
				820	* Creates a URI by parsing the given string.
				821	*
				822	* <p> This convenience factory method works as if by invoking the {@link
				823	* #URI(String)} constructor; any {@link URISyntaxException} thrown by the
				824	* constructor is caught and wrapped in a new {@link
				825	* IllegalArgumentException} object, which is then thrown.
				826	*
				827	* <p> This method is provided for use in situations where it is known that
				828	* the given string is a legal URI, for example for URI constants declared
				829	* within in a program, and so it would be considered a programming error
				830	* for the string not to parse as such. The constructors, which throw
				831	* {@link URISyntaxException} directly, should be used situations where a
				832	* URI is being constructed from user input or from some other source that
				833	* may be prone to errors. </p>
				834	*
				835	* @param str The string to be parsed into a URI
				836	* @return The new URI
				837	*
				838	* @throws NullPointerException
				839	* If {@code str} is {@code null}
				840	*
				841	* @throws IllegalArgumentException
				842	* If the given string violates RFC 2396
				843	*/
				844	public static URI create(String str) {
				845	try {
				846	return new URI(str);
				847	} catch (URISyntaxException x) {
				848	throw new IllegalArgumentException(x.getMessage(), x);
				849	}
				850	}
				851
				852
				853	// -- Operations --
				854
				855	/**
				856	* Attempts to parse this URI's authority component, if defined, into
				857	* user-information, host, and port components.
				858	*
				859	* <p> If this URI's authority component has already been recognized as
				860	* being server-based then it will already have been parsed into
				861	* user-information, host, and port components. In this case, or if this
				862	* URI has no authority component, this method simply returns this URI.
				863	*
				864	* <p> Otherwise this method attempts once more to parse the authority
				865	* component into user-information, host, and port components, and throws
				866	* an exception describing why the authority component could not be parsed
				867	* in that way.
				868	*
				869	* <p> This method is provided because the generic URI syntax specified in
				870	* <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>
				871	* cannot always distinguish a malformed server-based authority from a
				872	* legitimate registry-based authority. It must therefore treat some
				873	* instances of the former as instances of the latter. The authority
				874	* component in the URI string {@code "//foo:bar"}, for example, is not a
				875	* legal server-based authority but it is legal as a registry-based
				876	* authority.
				877	*
				878	* <p> In many common situations, for example when working URIs that are
				879	* known to be either URNs or URLs, the hierarchical URIs being used will
				880	* always be server-based. They therefore must either be parsed as such or
				881	* treated as an error. In these cases a statement such as
				882	*
				883	* <blockquote>
				884	* {@code URI }<i>u</i>{@code = new URI(str).parseServerAuthority();}
				885	* </blockquote>
				886	*
				887	* <p> can be used to ensure that <i>u</i> always refers to a URI that, if
				888	* it has an authority component, has a server-based authority with proper
				889	* user-information, host, and port components. Invoking this method also
				890	* ensures that if the authority could not be parsed in that way then an
				891	* appropriate diagnostic message can be issued based upon the exception
				892	* that is thrown. </p>
				893	*
				894	* @return A URI whose authority field has been parsed
				895	* as a server-based authority
				896	*
				897	* @throws URISyntaxException
				898	* If the authority component of this URI is defined
				899	* but cannot be parsed as a server-based authority
				900	* according to RFC 2396
				901	*/
				902	public URI parseServerAuthority()
				903	throws URISyntaxException
				904	{
				905	// We could be clever and cache the error message and index from the
				906	// exception thrown during the original parse, but that would require
				907	// either more fields or a more-obscure representation.
				908	if ((host != null) \|\| (authority == null))
				909	return this;
				910	defineString();
				911	new Parser(string).parse(true);
				912	return this;
				913	}
				914
				915	/**
				916	* Normalizes this URI's path.
				917	*
				918	* <p> If this URI is opaque, or if its path is already in normal form,
				919	* then this URI is returned. Otherwise a new URI is constructed that is
				920	* identical to this URI except that its path is computed by normalizing
				921	* this URI's path in a manner consistent with <a
				922	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				923	* section 5.2, step 6, sub-steps c through f; that is:
				924	* </p>
				925	*
				926	* <ol>
				927	*
				928	* <li><p> All {@code "."} segments are removed. </p></li>
				929	*
				930	* <li><p> If a {@code ".."} segment is preceded by a non-{@code ".."}
				931	* segment then both of these segments are removed. This step is
				932	* repeated until it is no longer applicable. </p></li>
				933	*
				934	* <li><p> If the path is relative, and if its first segment contains a
				935	* colon character ({@code ':'}), then a {@code "."} segment is
				936	* prepended. This prevents a relative URI with a path such as
				937	* {@code "a:b/c/d"} from later being re-parsed as an opaque URI with a
				938	* scheme of {@code "a"} and a scheme-specific part of {@code "b/c/d"}.
				939	* <b><i>(Deviation from RFC 2396)</i></b> </p></li>
				940	*
				941	* </ol>
				942	*
				943	* <p> A normalized path will begin with one or more {@code ".."} segments
				944	* if there were insufficient non-{@code ".."} segments preceding them to
				945	* allow their removal. A normalized path will begin with a {@code "."}
				946	* segment if one was inserted by step 3 above. Otherwise, a normalized
				947	* path will not contain any {@code "."} or {@code ".."} segments. </p>
				948	*
				949	* @return A URI equivalent to this URI,
				950	* but whose path is in normal form
				951	*/
				952	public URI normalize() {
				953	return normalize(this);
				954	}
				955
				956	/**
				957	* Resolves the given URI against this URI.
				958	*
				959	* <p> If the given URI is already absolute, or if this URI is opaque, then
				960	* the given URI is returned.
				961	*
				962	* <p><a name="resolve-frag"></a> If the given URI's fragment component is
				963	* defined, its path component is empty, and its scheme, authority, and
				964	* query components are undefined, then a URI with the given fragment but
				965	* with all other components equal to those of this URI is returned. This
				966	* allows a URI representing a standalone fragment reference, such as
				967	* {@code "#foo"}, to be usefully resolved against a base URI.
				968	*
				969	* <p> Otherwise this method constructs a new hierarchical URI in a manner
				970	* consistent with <a
				971	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				972	* section 5.2; that is: </p>
				973	*
				974	* <ol>
				975	*
				976	* <li><p> A new URI is constructed with this URI's scheme and the given
				977	* URI's query and fragment components. </p></li>
				978	*
				979	* <li><p> If the given URI has an authority component then the new URI's
				980	* authority and path are taken from the given URI. </p></li>
				981	*
				982	* <li><p> Otherwise the new URI's authority component is copied from
				983	* this URI, and its path is computed as follows: </p>
				984	*
				985	* <ol>
				986	*
				987	* <li><p> If the given URI's path is absolute then the new URI's path
				988	* is taken from the given URI. </p></li>
				989	*
				990	* <li><p> Otherwise the given URI's path is relative, and so the new
				991	* URI's path is computed by resolving the path of the given URI
				992	* against the path of this URI. This is done by concatenating all but
				993	* the last segment of this URI's path, if any, with the given URI's
				994	* path and then normalizing the result as if by invoking the {@link
				995	* #normalize() normalize} method. </p></li>
				996	*
				997	* </ol></li>
				998	*
				999	* </ol>
				1000	*
				1001	* <p> The result of this method is absolute if, and only if, either this
				1002	* URI is absolute or the given URI is absolute. </p>
				1003	*
				1004	* @param uri The URI to be resolved against this URI
				1005	* @return The resulting URI
				1006	*
				1007	* @throws NullPointerException
				1008	* If {@code uri} is {@code null}
				1009	*/
				1010	public URI resolve(URI uri) {
				1011	return resolve(this, uri);
				1012	}
				1013
				1014	/**
				1015	* Constructs a new URI by parsing the given string and then resolving it
				1016	* against this URI.
				1017	*
				1018	* <p> This convenience method works as if invoking it were equivalent to
				1019	* evaluating the expression {@link #resolve(java.net.URI)
				1020	* resolve}{@code (URI.}{@link #create(String) create}{@code (str))}. </p>
				1021	*
				1022	* @param str The string to be parsed into a URI
				1023	* @return The resulting URI
				1024	*
				1025	* @throws NullPointerException
				1026	* If {@code str} is {@code null}
				1027	*
				1028	* @throws IllegalArgumentException
				1029	* If the given string violates RFC 2396
				1030	*/
				1031	public URI resolve(String str) {
				1032	return resolve(URI.create(str));
				1033	}
				1034
				1035	/**
				1036	* Relativizes the given URI against this URI.
				1037	*
				1038	* <p> The relativization of the given URI against this URI is computed as
				1039	* follows: </p>
				1040	*
				1041	* <ol>
				1042	*
				1043	* <li><p> If either this URI or the given URI are opaque, or if the
				1044	* scheme and authority components of the two URIs are not identical, or
				1045	* if the path of this URI is not a prefix of the path of the given URI,
				1046	* then the given URI is returned. </p></li>
				1047	*
				1048	* <li><p> Otherwise a new relative hierarchical URI is constructed with
				1049	* query and fragment components taken from the given URI and with a path
				1050	* component computed by removing this URI's path from the beginning of
				1051	* the given URI's path. </p></li>
				1052	*
				1053	* </ol>
				1054	*
				1055	* @param uri The URI to be relativized against this URI
				1056	* @return The resulting URI
				1057	*
				1058	* @throws NullPointerException
				1059	* If {@code uri} is {@code null}
				1060	*/
				1061	public URI relativize(URI uri) {
				1062	return relativize(this, uri);
				1063	}
				1064
				1065	/**
				1066	* Constructs a URL from this URI.
				1067	*
				1068	* <p> This convenience method works as if invoking it were equivalent to
				1069	* evaluating the expression {@code new URL(this.toString())} after
				1070	* first checking that this URI is absolute. </p>
				1071	*
				1072	* @return A URL constructed from this URI
				1073	*
				1074	* @throws IllegalArgumentException
				1075	* If this URL is not absolute
				1076	*
				1077	* @throws MalformedURLException
				1078	* If a protocol handler for the URL could not be found,
				1079	* or if some other error occurred while constructing the URL
				1080	*/
				1081	public URL toURL()
				1082	throws MalformedURLException {
				1083	if (!isAbsolute())
				1084	throw new IllegalArgumentException("URI is not absolute");
				1085	return new URL(toString());
				1086	}
				1087
				1088	// -- Component access methods --
				1089
				1090	/**
				1091	* Returns the scheme component of this URI.
				1092	*
				1093	* <p> The scheme component of a URI, if defined, only contains characters
				1094	* in the <i>alphanum</i> category and in the string {@code "-.+"}. A
				1095	* scheme always starts with an <i>alpha</i> character. <p>
				1096	*
				1097	* The scheme component of a URI cannot contain escaped octets, hence this
				1098	* method does not perform any decoding.
				1099	*
				1100	* @return The scheme component of this URI,
				1101	* or {@code null} if the scheme is undefined
				1102	*/
				1103	public String getScheme() {
				1104	return scheme;
				1105	}
				1106
				1107	/**
				1108	* Tells whether or not this URI is absolute.
				1109	*
				1110	* <p> A URI is absolute if, and only if, it has a scheme component. </p>
				1111	*
				1112	* @return {@code true} if, and only if, this URI is absolute
				1113	*/
				1114	public boolean isAbsolute() {
				1115	return scheme != null;
				1116	}
				1117
				1118	/**
				1119	* Tells whether or not this URI is opaque.
				1120	*
				1121	* <p> A URI is opaque if, and only if, it is absolute and its
				1122	* scheme-specific part does not begin with a slash character ('/').
				1123	* An opaque URI has a scheme, a scheme-specific part, and possibly
				1124	* a fragment; all other components are undefined. </p>
				1125	*
				1126	* @return {@code true} if, and only if, this URI is opaque
				1127	*/
				1128	public boolean isOpaque() {
				1129	return path == null;
				1130	}
				1131
				1132	/**
				1133	* Returns the raw scheme-specific part of this URI. The scheme-specific
				1134	* part is never undefined, though it may be empty.
				1135	*
				1136	* <p> The scheme-specific part of a URI only contains legal URI
				1137	* characters. </p>
				1138	*
				1139	* @return The raw scheme-specific part of this URI
				1140	* (never {@code null})
				1141	*/
				1142	public String getRawSchemeSpecificPart() {
				1143	defineSchemeSpecificPart();
				1144	return schemeSpecificPart;
				1145	}
				1146
				1147	/**
				1148	* Returns the decoded scheme-specific part of this URI.
				1149	*
				1150	* <p> The string returned by this method is equal to that returned by the
				1151	* {@link #getRawSchemeSpecificPart() getRawSchemeSpecificPart} method
				1152	* except that all sequences of escaped octets are <a
				1153	* href="#decode">decoded</a>. </p>
				1154	*
				1155	* @return The decoded scheme-specific part of this URI
				1156	* (never {@code null})
				1157	*/
				1158	public String getSchemeSpecificPart() {
				1159	if (decodedSchemeSpecificPart == null)
				1160	decodedSchemeSpecificPart = decode(getRawSchemeSpecificPart());
				1161	return decodedSchemeSpecificPart;
				1162	}
				1163
				1164	/**
				1165	* Returns the raw authority component of this URI.
				1166	*
				1167	* <p> The authority component of a URI, if defined, only contains the
				1168	* commercial-at character ({@code '@'}) and characters in the
				1169	* <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and <i>other</i>
				1170	* categories. If the authority is server-based then it is further
				1171	* constrained to have valid user-information, host, and port
				1172	* components. </p>
				1173	*
				1174	* @return The raw authority component of this URI,
				1175	* or {@code null} if the authority is undefined
				1176	*/
				1177	public String getRawAuthority() {
				1178	return authority;
				1179	}
				1180
				1181	/**
				1182	* Returns the decoded authority component of this URI.
				1183	*
				1184	* <p> The string returned by this method is equal to that returned by the
				1185	* {@link #getRawAuthority() getRawAuthority} method except that all
				1186	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
				1187	*
				1188	* @return The decoded authority component of this URI,
				1189	* or {@code null} if the authority is undefined
				1190	*/
				1191	public String getAuthority() {
				1192	if (decodedAuthority == null)
				1193	decodedAuthority = decode(authority);
				1194	return decodedAuthority;
				1195	}
				1196
				1197	/**
				1198	* Returns the raw user-information component of this URI.
				1199	*
				1200	* <p> The user-information component of a URI, if defined, only contains
				1201	* characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>, and
				1202	* <i>other</i> categories. </p>
				1203	*
				1204	* @return The raw user-information component of this URI,
				1205	* or {@code null} if the user information is undefined
				1206	*/
				1207	public String getRawUserInfo() {
				1208	return userInfo;
				1209	}
				1210
				1211	/**
				1212	* Returns the decoded user-information component of this URI.
				1213	*
				1214	* <p> The string returned by this method is equal to that returned by the
				1215	* {@link #getRawUserInfo() getRawUserInfo} method except that all
				1216	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
				1217	*
				1218	* @return The decoded user-information component of this URI,
				1219	* or {@code null} if the user information is undefined
				1220	*/
				1221	public String getUserInfo() {
				1222	if ((decodedUserInfo == null) && (userInfo != null))
				1223	decodedUserInfo = decode(userInfo);
				1224	return decodedUserInfo;
				1225	}
				1226
				1227	/**
				1228	* Returns the host component of this URI.
				1229	*
				1230	* <p> The host component of a URI, if defined, will have one of the
				1231	* following forms: </p>
				1232	*
				1233	* <ul>
				1234	*
				1235	* <li><p> A domain name consisting of one or more <i>labels</i>
				1236	* separated by period characters ({@code '.'}), optionally followed by
				1237	* a period character. Each label consists of <i>alphanum</i> characters
				1238	* as well as hyphen characters ({@code '-'}), though hyphens never
				1239	* occur as the first or last characters in a label. The rightmost
				1240	* label of a domain name consisting of two or more labels, begins
				1241	* with an <i>alpha</i> character. </li>
				1242	*
				1243	* <li><p> A dotted-quad IPv4 address of the form
				1244	* <i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +.}<i>digit</i>{@code +},
				1245	* where no <i>digit</i> sequence is longer than three characters and no
				1246	* sequence has a value larger than 255. </p></li>
				1247	*
				1248	* <li><p> An IPv6 address enclosed in square brackets ({@code '['} and
				1249	* {@code ']'}) and consisting of hexadecimal digits, colon characters
				1250	* ({@code ':'}), and possibly an embedded IPv4 address. The full
				1251	* syntax of IPv6 addresses is specified in <a
				1252	* href="http://www.ietf.org/rfc/rfc2373.txt"><i>RFC 2373: IPv6
				1253	* Addressing Architecture</i></a>. </p></li>
				1254	*
				1255	* </ul>
				1256	*
				1257	* The host component of a URI cannot contain escaped octets, hence this
				1258	* method does not perform any decoding.
				1259	*
				1260	* @return The host component of this URI,
				1261	* or {@code null} if the host is undefined
				1262	*/
				1263	public String getHost() {
				1264	return host;
				1265	}
				1266
				1267	/**
				1268	* Returns the port number of this URI.
				1269	*
				1270	* <p> The port component of a URI, if defined, is a non-negative
				1271	* integer. </p>
				1272	*
				1273	* @return The port component of this URI,
				1274	* or {@code -1} if the port is undefined
				1275	*/
				1276	public int getPort() {
				1277	return port;
				1278	}
				1279
				1280	/**
				1281	* Returns the raw path component of this URI.
				1282	*
				1283	* <p> The path component of a URI, if defined, only contains the slash
				1284	* character ({@code '/'}), the commercial-at character ({@code '@'}),
				1285	* and characters in the <i>unreserved</i>, <i>punct</i>, <i>escaped</i>,
				1286	* and <i>other</i> categories. </p>
				1287	*
				1288	* @return The path component of this URI,
				1289	* or {@code null} if the path is undefined
				1290	*/
				1291	public String getRawPath() {
				1292	return path;
				1293	}
				1294
				1295	/**
				1296	* Returns the decoded path component of this URI.
				1297	*
				1298	* <p> The string returned by this method is equal to that returned by the
				1299	* {@link #getRawPath() getRawPath} method except that all sequences of
				1300	* escaped octets are <a href="#decode">decoded</a>. </p>
				1301	*
				1302	* @return The decoded path component of this URI,
				1303	* or {@code null} if the path is undefined
				1304	*/
				1305	public String getPath() {
				1306	if ((decodedPath == null) && (path != null))
				1307	decodedPath = decode(path);
				1308	return decodedPath;
				1309	}
				1310
				1311	/**
				1312	* Returns the raw query component of this URI.
				1313	*
				1314	* <p> The query component of a URI, if defined, only contains legal URI
				1315	* characters. </p>
				1316	*
				1317	* @return The raw query component of this URI,
				1318	* or {@code null} if the query is undefined
				1319	*/
				1320	public String getRawQuery() {
				1321	return query;
				1322	}
				1323
				1324	/**
				1325	* Returns the decoded query component of this URI.
				1326	*
				1327	* <p> The string returned by this method is equal to that returned by the
				1328	* {@link #getRawQuery() getRawQuery} method except that all sequences of
				1329	* escaped octets are <a href="#decode">decoded</a>. </p>
				1330	*
				1331	* @return The decoded query component of this URI,
				1332	* or {@code null} if the query is undefined
				1333	*/
				1334	public String getQuery() {
				1335	if ((decodedQuery == null) && (query != null))
				1336	decodedQuery = decode(query);
				1337	return decodedQuery;
				1338	}
				1339
				1340	/**
				1341	* Returns the raw fragment component of this URI.
				1342	*
				1343	* <p> The fragment component of a URI, if defined, only contains legal URI
				1344	* characters. </p>
				1345	*
				1346	* @return The raw fragment component of this URI,
				1347	* or {@code null} if the fragment is undefined
				1348	*/
				1349	public String getRawFragment() {
				1350	return fragment;
				1351	}
				1352
				1353	/**
				1354	* Returns the decoded fragment component of this URI.
				1355	*
				1356	* <p> The string returned by this method is equal to that returned by the
				1357	* {@link #getRawFragment() getRawFragment} method except that all
				1358	* sequences of escaped octets are <a href="#decode">decoded</a>. </p>
				1359	*
				1360	* @return The decoded fragment component of this URI,
				1361	* or {@code null} if the fragment is undefined
				1362	*/
				1363	public String getFragment() {
				1364	if ((decodedFragment == null) && (fragment != null))
				1365	decodedFragment = decode(fragment);
				1366	return decodedFragment;
				1367	}
				1368
				1369
				1370	// -- Equality, comparison, hash code, toString, and serialization --
				1371
				1372	/**
				1373	* Tests this URI for equality with another object.
				1374	*
				1375	* <p> If the given object is not a URI then this method immediately
				1376	* returns {@code false}.
				1377	*
				1378	* <p> For two URIs to be considered equal requires that either both are
				1379	* opaque or both are hierarchical. Their schemes must either both be
				1380	* undefined or else be equal without regard to case. Their fragments
				1381	* must either both be undefined or else be equal.
				1382	*
				1383	* <p> For two opaque URIs to be considered equal, their scheme-specific
				1384	* parts must be equal.
				1385	*
				1386	* <p> For two hierarchical URIs to be considered equal, their paths must
				1387	* be equal and their queries must either both be undefined or else be
				1388	* equal. Their authorities must either both be undefined, or both be
				1389	* registry-based, or both be server-based. If their authorities are
				1390	* defined and are registry-based, then they must be equal. If their
				1391	* authorities are defined and are server-based, then their hosts must be
				1392	* equal without regard to case, their port numbers must be equal, and
				1393	* their user-information components must be equal.
				1394	*
				1395	* <p> When testing the user-information, path, query, fragment, authority,
				1396	* or scheme-specific parts of two URIs for equality, the raw forms rather
				1397	* than the encoded forms of these components are compared and the
				1398	* hexadecimal digits of escaped octets are compared without regard to
				1399	* case.
				1400	*
				1401	* <p> This method satisfies the general contract of the {@link
				1402	* java.lang.Object#equals(Object) Object.equals} method. </p>
				1403	*
				1404	* @param ob The object to which this object is to be compared
				1405	*
				1406	* @return {@code true} if, and only if, the given object is a URI that
				1407	* is identical to this URI
				1408	*/
				1409	public boolean equals(Object ob) {
				1410	if (ob == this)
				1411	return true;
				1412	if (!(ob instanceof URI))
				1413	return false;
				1414	URI that = (URI)ob;
				1415	if (this.isOpaque() != that.isOpaque()) return false;
				1416	if (!equalIgnoringCase(this.scheme, that.scheme)) return false;
				1417	if (!equal(this.fragment, that.fragment)) return false;
				1418
				1419	// Opaque
				1420	if (this.isOpaque())
				1421	return equal(this.schemeSpecificPart, that.schemeSpecificPart);
				1422
				1423	// Hierarchical
				1424	if (!equal(this.path, that.path)) return false;
				1425	if (!equal(this.query, that.query)) return false;
				1426
				1427	// Authorities
				1428	if (this.authority == that.authority) return true;
				1429	if (this.host != null) {
				1430	// Server-based
				1431	if (!equal(this.userInfo, that.userInfo)) return false;
				1432	if (!equalIgnoringCase(this.host, that.host)) return false;
				1433	if (this.port != that.port) return false;
				1434	} else if (this.authority != null) {
				1435	// Registry-based
				1436	if (!equal(this.authority, that.authority)) return false;
				1437	} else if (this.authority != that.authority) {
				1438	return false;
				1439	}
				1440
				1441	return true;
				1442	}
				1443
				1444	/**
				1445	* Returns a hash-code value for this URI. The hash code is based upon all
				1446	* of the URI's components, and satisfies the general contract of the
				1447	* {@link java.lang.Object#hashCode() Object.hashCode} method.
				1448	*
				1449	* @return A hash-code value for this URI
				1450	*/
				1451	public int hashCode() {
				1452	if (hash != 0)
				1453	return hash;
				1454	int h = hashIgnoringCase(0, scheme);
				1455	h = hash(h, fragment);
				1456	if (isOpaque()) {
				1457	h = hash(h, schemeSpecificPart);
				1458	} else {
				1459	h = hash(h, path);
				1460	h = hash(h, query);
				1461	if (host != null) {
				1462	h = hash(h, userInfo);
				1463	h = hashIgnoringCase(h, host);
				1464	h += 1949 * port;
				1465	} else {
				1466	h = hash(h, authority);
				1467	}
				1468	}
				1469	hash = h;
				1470	return h;
				1471	}
				1472
				1473	/**
				1474	* Compares this URI to another object, which must be a URI.
				1475	*
				1476	* <p> When comparing corresponding components of two URIs, if one
				1477	* component is undefined but the other is defined then the first is
				1478	* considered to be less than the second. Unless otherwise noted, string
				1479	* components are ordered according to their natural, case-sensitive
				1480	* ordering as defined by the {@link java.lang.String#compareTo(Object)
				1481	* String.compareTo} method. String components that are subject to
				1482	* encoding are compared by comparing their raw forms rather than their
				1483	* encoded forms.
				1484	*
				1485	* <p> The ordering of URIs is defined as follows: </p>
				1486	*
				1487	* <ul>
				1488	*
				1489	* <li><p> Two URIs with different schemes are ordered according the
				1490	* ordering of their schemes, without regard to case. </p></li>
				1491	*
				1492	* <li><p> A hierarchical URI is considered to be less than an opaque URI
				1493	* with an identical scheme. </p></li>
				1494	*
				1495	* <li><p> Two opaque URIs with identical schemes are ordered according
				1496	* to the ordering of their scheme-specific parts. </p></li>
				1497	*
				1498	* <li><p> Two opaque URIs with identical schemes and scheme-specific
				1499	* parts are ordered according to the ordering of their
				1500	* fragments. </p></li>
				1501	*
				1502	* <li><p> Two hierarchical URIs with identical schemes are ordered
				1503	* according to the ordering of their authority components: </p>
				1504	*
				1505	* <ul>
				1506	*
				1507	* <li><p> If both authority components are server-based then the URIs
				1508	* are ordered according to their user-information components; if these
				1509	* components are identical then the URIs are ordered according to the
				1510	* ordering of their hosts, without regard to case; if the hosts are
				1511	* identical then the URIs are ordered according to the ordering of
				1512	* their ports. </p></li>
				1513	*
				1514	* <li><p> If one or both authority components are registry-based then
				1515	* the URIs are ordered according to the ordering of their authority
				1516	* components. </p></li>
				1517	*
				1518	* </ul></li>
				1519	*
				1520	* <li><p> Finally, two hierarchical URIs with identical schemes and
				1521	* authority components are ordered according to the ordering of their
				1522	* paths; if their paths are identical then they are ordered according to
				1523	* the ordering of their queries; if the queries are identical then they
				1524	* are ordered according to the order of their fragments. </p></li>
				1525	*
				1526	* </ul>
				1527	*
				1528	* <p> This method satisfies the general contract of the {@link
				1529	* java.lang.Comparable#compareTo(Object) Comparable.compareTo}
				1530	* method. </p>
				1531	*
				1532	* @param that
				1533	* The object to which this URI is to be compared
				1534	*
				1535	* @return A negative integer, zero, or a positive integer as this URI is
				1536	* less than, equal to, or greater than the given URI
				1537	*
				1538	* @throws ClassCastException
				1539	* If the given object is not a URI
				1540	*/
				1541	public int compareTo(URI that) {
				1542	int c;
				1543
				1544	if ((c = compareIgnoringCase(this.scheme, that.scheme)) != 0)
				1545	return c;
				1546
				1547	if (this.isOpaque()) {
				1548	if (that.isOpaque()) {
				1549	// Both opaque
				1550	if ((c = compare(this.schemeSpecificPart,
				1551	that.schemeSpecificPart)) != 0)
				1552	return c;
				1553	return compare(this.fragment, that.fragment);
				1554	}
				1555	return +1; // Opaque > hierarchical
				1556	} else if (that.isOpaque()) {
				1557	return -1; // Hierarchical < opaque
				1558	}
				1559
				1560	// Hierarchical
				1561	if ((this.host != null) && (that.host != null)) {
				1562	// Both server-based
				1563	if ((c = compare(this.userInfo, that.userInfo)) != 0)
				1564	return c;
				1565	if ((c = compareIgnoringCase(this.host, that.host)) != 0)
				1566	return c;
				1567	if ((c = this.port - that.port) != 0)
				1568	return c;
				1569	} else {
				1570	// If one or both authorities are registry-based then we simply
				1571	// compare them in the usual, case-sensitive way. If one is
				1572	// registry-based and one is server-based then the strings are
				1573	// guaranteed to be unequal, hence the comparison will never return
				1574	// zero and the compareTo and equals methods will remain
				1575	// consistent.
				1576	if ((c = compare(this.authority, that.authority)) != 0) return c;
				1577	}
				1578
				1579	if ((c = compare(this.path, that.path)) != 0) return c;
				1580	if ((c = compare(this.query, that.query)) != 0) return c;
				1581	return compare(this.fragment, that.fragment);
				1582	}
				1583
				1584	/**
				1585	* Returns the content of this URI as a string.
				1586	*
				1587	* <p> If this URI was created by invoking one of the constructors in this
				1588	* class then a string equivalent to the original input string, or to the
				1589	* string computed from the originally-given components, as appropriate, is
				1590	* returned. Otherwise this URI was created by normalization, resolution,
				1591	* or relativization, and so a string is constructed from this URI's
				1592	* components according to the rules specified in <a
				1593	* href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</a>,
				1594	* section 5.2, step 7. </p>
				1595	*
				1596	* @return The string form of this URI
				1597	*/
				1598	public String toString() {
				1599	defineString();
				1600	return string;
				1601	}
				1602
				1603	/**
				1604	* Returns the content of this URI as a US-ASCII string.
				1605	*
				1606	* <p> If this URI does not contain any characters in the <i>other</i>
				1607	* category then an invocation of this method will return the same value as
				1608	* an invocation of the {@link #toString() toString} method. Otherwise
				1609	* this method works as if by invoking that method and then <a
				1610	* href="#encode">encoding</a> the result. </p>
				1611	*
				1612	* @return The string form of this URI, encoded as needed
				1613	* so that it only contains characters in the US-ASCII
				1614	* charset
				1615	*/
				1616	public String toASCIIString() {
				1617	defineString();
				1618	return encode(string);
				1619	}
				1620
				1621
				1622	// -- Serialization support --
				1623
				1624	/**
				1625	* Saves the content of this URI to the given serial stream.
				1626	*
				1627	* <p> The only serializable field of a URI instance is its {@code string}
				1628	* field. That field is given a value, if it does not have one already,
				1629	* and then the {@link java.io.ObjectOutputStream#defaultWriteObject()}
				1630	* method of the given object-output stream is invoked. </p>
				1631	*
				1632	* @param os The object-output stream to which this object
				1633	* is to be written
				1634	*/
				1635	private void writeObject(ObjectOutputStream os)
				1636	throws IOException
				1637	{
				1638	defineString();
				1639	os.defaultWriteObject(); // Writes the string field only
				1640	}
				1641
				1642	/**
				1643	* Reconstitutes a URI from the given serial stream.
				1644	*
				1645	* <p> The {@link java.io.ObjectInputStream#defaultReadObject()} method is
				1646	* invoked to read the value of the {@code string} field. The result is
				1647	* then parsed in the usual way.
				1648	*
				1649	* @param is The object-input stream from which this object
				1650	* is being read
				1651	*/
				1652	private void readObject(ObjectInputStream is)
				1653	throws ClassNotFoundException, IOException
				1654	{
				1655	port = -1; // Argh
				1656	is.defaultReadObject();
				1657	try {
				1658	new Parser(string).parse(false);
				1659	} catch (URISyntaxException x) {
				1660	IOException y = new InvalidObjectException("Invalid URI");
				1661	y.initCause(x);
				1662	throw y;
				1663	}
				1664	}
				1665
				1666
				1667	// -- End of public methods --
				1668
				1669
				1670	// -- Utility methods for string-field comparison and hashing --
				1671
				1672	// These methods return appropriate values for null string arguments,
				1673	// thereby simplifying the equals, hashCode, and compareTo methods.
				1674	//
				1675	// The case-ignoring methods should only be applied to strings whose
				1676	// characters are all known to be US-ASCII. Because of this restriction,
				1677	// these methods are faster than the similar methods in the String class.
				1678
				1679	// US-ASCII only
				1680	private static int toLower(char c) {
				1681	if ((c >= 'A') && (c <= 'Z'))
				1682	return c + ('a' - 'A');
				1683	return c;
				1684	}
				1685
				1686	// US-ASCII only
				1687	private static int toUpper(char c) {
				1688	if ((c >= 'a') && (c <= 'z'))
				1689	return c - ('a' - 'A');
				1690	return c;
				1691	}
				1692
				1693	private static boolean equal(String s, String t) {
				1694	if (s == t) return true;
				1695	if ((s != null) && (t != null)) {
				1696	if (s.length() != t.length())
				1697	return false;
				1698	if (s.indexOf('%') < 0)
				1699	return s.equals(t);
				1700	int n = s.length();
				1701	for (int i = 0; i < n;) {
				1702	char c = s.charAt(i);
				1703	char d = t.charAt(i);
				1704	if (c != '%') {
				1705	if (c != d)
				1706	return false;
				1707	i++;
				1708	continue;
				1709	}
				1710	if (d != '%')
				1711	return false;
				1712	i++;
				1713	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
				1714	return false;
				1715	i++;
				1716	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
				1717	return false;
				1718	i++;
				1719	}
				1720	return true;
				1721	}
				1722	return false;
				1723	}
				1724
				1725	// US-ASCII only
				1726	private static boolean equalIgnoringCase(String s, String t) {
				1727	if (s == t) return true;
				1728	if ((s != null) && (t != null)) {
				1729	int n = s.length();
				1730	if (t.length() != n)
				1731	return false;
				1732	for (int i = 0; i < n; i++) {
				1733	if (toLower(s.charAt(i)) != toLower(t.charAt(i)))
				1734	return false;
				1735	}
				1736	return true;
				1737	}
				1738	return false;
				1739	}
				1740
				1741	private static int hash(int hash, String s) {
				1742	if (s == null) return hash;
				1743	return s.indexOf('%') < 0 ? hash * 127 + s.hashCode()
				1744	: normalizedHash(hash, s);
				1745	}
				1746
				1747
				1748	private static int normalizedHash(int hash, String s) {
				1749	int h = 0;
				1750	for (int index = 0; index < s.length(); index++) {
				1751	char ch = s.charAt(index);
				1752	h = 31 * h + ch;
				1753	if (ch == '%') {
				1754	/*
				1755	* Process the next two encoded characters
				1756	*/
				1757	for (int i = index + 1; i < index + 3; i++)
				1758	h = 31 * h + toUpper(s.charAt(i));
				1759	index += 2;
				1760	}
				1761	}
				1762	return hash * 127 + h;
				1763	}
				1764
				1765	// US-ASCII only
				1766	private static int hashIgnoringCase(int hash, String s) {
				1767	if (s == null) return hash;
				1768	int h = hash;
				1769	int n = s.length();
				1770	for (int i = 0; i < n; i++)
				1771	h = 31 * h + toLower(s.charAt(i));
				1772	return h;
				1773	}
				1774
				1775	private static int compare(String s, String t) {
				1776	if (s == t) return 0;
				1777	if (s != null) {
				1778	if (t != null)
				1779	return s.compareTo(t);
				1780	else
				1781	return +1;
				1782	} else {
				1783	return -1;
				1784	}
				1785	}
				1786
				1787	// US-ASCII only
				1788	private static int compareIgnoringCase(String s, String t) {
				1789	if (s == t) return 0;
				1790	if (s != null) {
				1791	if (t != null) {
				1792	int sn = s.length();
				1793	int tn = t.length();
				1794	int n = sn < tn ? sn : tn;
				1795	for (int i = 0; i < n; i++) {
				1796	int c = toLower(s.charAt(i)) - toLower(t.charAt(i));
				1797	if (c != 0)
				1798	return c;
				1799	}
				1800	return sn - tn;
				1801	}
				1802	return +1;
				1803	} else {
				1804	return -1;
				1805	}
				1806	}
				1807
				1808
				1809	// -- String construction --
				1810
				1811	// If a scheme is given then the path, if given, must be absolute
				1812	//
				1813	private static void checkPath(String s, String scheme, String path)
				1814	throws URISyntaxException
				1815	{
				1816	if (scheme != null) {
				1817	if ((path != null)
				1818	&& ((path.length() > 0) && (path.charAt(0) != '/')))
				1819	throw new URISyntaxException(s,
				1820	"Relative path in absolute URI");
				1821	}
				1822	}
				1823
				1824	private void appendAuthority(StringBuffer sb,
				1825	String authority,
				1826	String userInfo,
				1827	String host,
				1828	int port)
				1829	{
				1830	if (host != null) {
				1831	sb.append("//");
				1832	if (userInfo != null) {
				1833	sb.append(quote(userInfo, L_USERINFO, H_USERINFO));
				1834	sb.append('@');
				1835	}
				1836	boolean needBrackets = ((host.indexOf(':') >= 0)
				1837	&& !host.startsWith("[")
				1838	&& !host.endsWith("]"));
				1839	if (needBrackets) sb.append('[');
				1840	sb.append(host);
				1841	if (needBrackets) sb.append(']');
				1842	if (port != -1) {
				1843	sb.append(':');
				1844	sb.append(port);
				1845	}
				1846	} else if (authority != null) {
				1847	sb.append("//");
				1848	if (authority.startsWith("[")) {
				1849	// authority should (but may not) contain an embedded IPv6 address
				1850	int end = authority.indexOf("]");
				1851	String doquote = authority, dontquote = "";
				1852	if (end != -1 && authority.indexOf(":") != -1) {
				1853	// the authority contains an IPv6 address
				1854	if (end == authority.length()) {
				1855	dontquote = authority;
				1856	doquote = "";
				1857	} else {
				1858	dontquote = authority.substring(0 , end + 1);
				1859	doquote = authority.substring(end + 1);
				1860	}
				1861	}
				1862	sb.append(dontquote);
				1863	sb.append(quote(doquote,
				1864	L_REG_NAME \| L_SERVER,
				1865	H_REG_NAME \| H_SERVER));
				1866	} else {
				1867	sb.append(quote(authority,
				1868	L_REG_NAME \| L_SERVER,
				1869	H_REG_NAME \| H_SERVER));
				1870	}
				1871	}
				1872	}
				1873
				1874	private void appendSchemeSpecificPart(StringBuffer sb,
				1875	String opaquePart,
				1876	String authority,
				1877	String userInfo,
				1878	String host,
				1879	int port,
				1880	String path,
				1881	String query)
				1882	{
				1883	if (opaquePart != null) {
				1884	/* check if SSP begins with an IPv6 address
				1885	* because we must not quote a literal IPv6 address
				1886	*/
				1887	if (opaquePart.startsWith("//[")) {
				1888	int end = opaquePart.indexOf("]");
				1889	if (end != -1 && opaquePart.indexOf(":")!=-1) {
				1890	String doquote, dontquote;
				1891	if (end == opaquePart.length()) {
				1892	dontquote = opaquePart;
				1893	doquote = "";
				1894	} else {
				1895	dontquote = opaquePart.substring(0,end+1);
				1896	doquote = opaquePart.substring(end+1);
				1897	}
				1898	sb.append (dontquote);
				1899	sb.append(quote(doquote, L_URIC, H_URIC));
				1900	}
				1901	} else {
				1902	sb.append(quote(opaquePart, L_URIC, H_URIC));
				1903	}
				1904	} else {
				1905	appendAuthority(sb, authority, userInfo, host, port);
				1906	if (path != null)
				1907	sb.append(quote(path, L_PATH, H_PATH));
				1908	if (query != null) {
				1909	sb.append('?');
				1910	sb.append(quote(query, L_URIC, H_URIC));
				1911	}
				1912	}
				1913	}
				1914
				1915	private void appendFragment(StringBuffer sb, String fragment) {
				1916	if (fragment != null) {
				1917	sb.append('#');
				1918	sb.append(quote(fragment, L_URIC, H_URIC));
				1919	}
				1920	}
				1921
				1922	private String toString(String scheme,
				1923	String opaquePart,
				1924	String authority,
				1925	String userInfo,
				1926	String host,
				1927	int port,
				1928	String path,
				1929	String query,
				1930	String fragment)
				1931	{
				1932	StringBuffer sb = new StringBuffer();
				1933	if (scheme != null) {
				1934	sb.append(scheme);
				1935	sb.append(':');
				1936	}
				1937	appendSchemeSpecificPart(sb, opaquePart,
				1938	authority, userInfo, host, port,
				1939	path, query);
				1940	appendFragment(sb, fragment);
				1941	return sb.toString();
				1942	}
				1943
				1944	private void defineSchemeSpecificPart() {
				1945	if (schemeSpecificPart != null) return;
				1946	StringBuffer sb = new StringBuffer();
				1947	appendSchemeSpecificPart(sb, null, getAuthority(), getUserInfo(),
				1948	host, port, getPath(), getQuery());
				1949	if (sb.length() == 0) return;
				1950	schemeSpecificPart = sb.toString();
				1951	}
				1952
				1953	private void defineString() {
				1954	if (string != null) return;
				1955
				1956	StringBuffer sb = new StringBuffer();
				1957	if (scheme != null) {
				1958	sb.append(scheme);
				1959	sb.append(':');
				1960	}
				1961	if (isOpaque()) {
				1962	sb.append(schemeSpecificPart);
				1963	} else {
				1964	if (host != null) {
				1965	sb.append("//");
				1966	if (userInfo != null) {
				1967	sb.append(userInfo);
				1968	sb.append('@');
				1969	}
				1970	boolean needBrackets = ((host.indexOf(':') >= 0)
				1971	&& !host.startsWith("[")
				1972	&& !host.endsWith("]"));
				1973	if (needBrackets) sb.append('[');
				1974	sb.append(host);
				1975	if (needBrackets) sb.append(']');
				1976	if (port != -1) {
				1977	sb.append(':');
				1978	sb.append(port);
				1979	}
				1980	} else if (authority != null) {
				1981	sb.append("//");
				1982	sb.append(authority);
				1983	}
				1984	if (path != null)
				1985	sb.append(path);
				1986	if (query != null) {
				1987	sb.append('?');
				1988	sb.append(query);
				1989	}
				1990	}
				1991	if (fragment != null) {
				1992	sb.append('#');
				1993	sb.append(fragment);
				1994	}
				1995	string = sb.toString();
				1996	}
				1997
				1998
				1999	// -- Normalization, resolution, and relativization --
				2000
				2001	// RFC2396 5.2 (6)
				2002	private static String resolvePath(String base, String child,
				2003	boolean absolute)
				2004	{
				2005	int i = base.lastIndexOf('/');
				2006	int cn = child.length();
				2007	String path = "";
				2008
				2009	if (cn == 0) {
				2010	// 5.2 (6a)
				2011	if (i >= 0)
				2012	path = base.substring(0, i + 1);
				2013	} else {
				2014	StringBuffer sb = new StringBuffer(base.length() + cn);
				2015	// 5.2 (6a)
				2016	if (i >= 0)
				2017	sb.append(base.substring(0, i + 1));
				2018	// 5.2 (6b)
				2019	sb.append(child);
				2020	path = sb.toString();
				2021	}
				2022
				2023	// 5.2 (6c-f)
				2024	// Android-changed: App compat. Remove leading dots when resolving path. http://b/25897693
				2025	// String np = normalize(path);
				2026	String np = normalize(path, true);
				2027
				2028	// 5.2 (6g): If the result is absolute but the path begins with "../",
				2029	// then we simply leave the path as-is
				2030
				2031	return np;
				2032	}
				2033
				2034	// RFC2396 5.2
				2035	private static URI resolve(URI base, URI child) {
				2036	// check if child if opaque first so that NPE is thrown
				2037	// if child is null.
				2038	if (child.isOpaque() \|\| base.isOpaque())
				2039	return child;
				2040
				2041	// 5.2 (2): Reference to current document (lone fragment)
				2042	if ((child.scheme == null) && (child.authority == null)
				2043	&& child.path.equals("") && (child.fragment != null)
				2044	&& (child.query == null)) {
				2045	if ((base.fragment != null)
				2046	&& child.fragment.equals(base.fragment)) {
				2047	return base;
				2048	}
				2049	URI ru = new URI();
				2050	ru.scheme = base.scheme;
				2051	ru.authority = base.authority;
				2052	ru.userInfo = base.userInfo;
				2053	ru.host = base.host;
				2054	ru.port = base.port;
				2055	ru.path = base.path;
				2056	ru.fragment = child.fragment;
				2057	ru.query = base.query;
				2058	return ru;
				2059	}
				2060
				2061	// 5.2 (3): Child is absolute
				2062	if (child.scheme != null)
				2063	return child;
				2064
				2065	URI ru = new URI(); // Resolved URI
				2066	ru.scheme = base.scheme;
				2067	ru.query = child.query;
				2068	ru.fragment = child.fragment;
				2069
				2070	// 5.2 (4): Authority
				2071	if (child.authority == null) {
				2072	ru.authority = base.authority;
				2073	ru.host = base.host;
				2074	ru.userInfo = base.userInfo;
				2075	ru.port = base.port;
				2076
				2077	// BEGIN Android-changed: App Compat. Handle null and empty path using RFC 3986 logic
				2078	// http://b/25897693
				2079	if (child.path == null \|\| child.path.isEmpty()) {
				2080	// This is an additional path from RFC 3986 RI, which fixes following RFC 2396
				2081	// "normal" examples:
				2082	// Base: http://a/b/c/d;p?q
				2083	// "?y" = "http://a/b/c/d;p?y"
				2084	// "" = "http://a/b/c/d;p?q"
				2085	// http://b/25897693
				2086	ru.path = base.path;
				2087	ru.query = child.query != null ? child.query : base.query;
				2088	// END Android-changed: App Compat. Handle null and empty path using RFC 3986 logic
				2089	} else if ((child.path.length() > 0) && (child.path.charAt(0) == '/')) {
				2090	// 5.2 (5): Child path is absolute
				2091	//
				2092	// Android-changed: App Compat. Remove leading dots in path.
				2093	// There is an additional step from RFC 3986 RI, requiring to remove dots for
				2094	// absolute path as well.
				2095	// http://b/25897693
				2096	// ru.path = child.path;
				2097	ru.path = normalize(child.path, true);
				2098	} else {
				2099	// 5.2 (6): Resolve relative path
				2100	ru.path = resolvePath(base.path, child.path, base.isAbsolute());
				2101	}
				2102	} else {
				2103	ru.authority = child.authority;
				2104	ru.host = child.host;
				2105	ru.userInfo = child.userInfo;
				2106	ru.host = child.host;
				2107	ru.port = child.port;
				2108	ru.path = child.path;
				2109	}
				2110
				2111	// 5.2 (7): Recombine (nothing to do here)
				2112	return ru;
				2113	}
				2114
				2115	// If the given URI's path is normal then return the URI;
				2116	// o.w., return a new URI containing the normalized path.
				2117	//
				2118	private static URI normalize(URI u) {
				2119	if (u.isOpaque() \|\| (u.path == null) \|\| (u.path.length() == 0))
				2120	return u;
				2121
				2122	String np = normalize(u.path);
				2123	if (np == u.path)
				2124	return u;
				2125
				2126	URI v = new URI();
				2127	v.scheme = u.scheme;
				2128	v.fragment = u.fragment;
				2129	v.authority = u.authority;
				2130	v.userInfo = u.userInfo;
				2131	v.host = u.host;
				2132	v.port = u.port;
				2133	v.path = np;
				2134	v.query = u.query;
				2135	return v;
				2136	}
				2137
				2138	// If both URIs are hierarchical, their scheme and authority components are
				2139	// identical, and the base path is a prefix of the child's path, then
				2140	// return a relative URI that, when resolved against the base, yields the
				2141	// child; otherwise, return the child.
				2142	//
				2143	private static URI relativize(URI base, URI child) {
				2144	// check if child if opaque first so that NPE is thrown
				2145	// if child is null.
				2146	if (child.isOpaque() \|\| base.isOpaque())
				2147	return child;
				2148	if (!equalIgnoringCase(base.scheme, child.scheme)
				2149	\|\| !equal(base.authority, child.authority))
				2150	return child;
				2151
				2152	String bp = normalize(base.path);
				2153	String cp = normalize(child.path);
				2154	if (!bp.equals(cp)) {
				2155	// Android-changed: App Compat. Interpret ambiguous base path as a file, not a directory
				2156	// Upstream would append '/' to bp if not present, interpreting it as a directory; thus,
				2157	// /a/b/c relative to /a/b would become /c, whereas Android would relativize to /b/c.
				2158	// The spec is pretty vague about this but the Android behavior is kept because several
				2159	// tests enforce it.
				2160	// if (!bp.endsWith("/"))
				2161	// bp = bp + "/";
				2162	if (bp.indexOf('/') != -1) {
				2163	bp = bp.substring(0, bp.lastIndexOf('/') + 1);
				2164	}
				2165
				2166	if (!cp.startsWith(bp))
				2167	return child;
				2168	}
				2169
				2170	URI v = new URI();
				2171	v.path = cp.substring(bp.length());
				2172	v.query = child.query;
				2173	v.fragment = child.fragment;
				2174	return v;
				2175	}
				2176
				2177
				2178
				2179	// -- Path normalization --
				2180
				2181	// The following algorithm for path normalization avoids the creation of a
				2182	// string object for each segment, as well as the use of a string buffer to
				2183	// compute the final result, by using a single char array and editing it in
				2184	// place. The array is first split into segments, replacing each slash
				2185	// with '\0' and creating a segment-index array, each element of which is
				2186	// the index of the first char in the corresponding segment. We then walk
				2187	// through both arrays, removing ".", "..", and other segments as necessary
				2188	// by setting their entries in the index array to -1. Finally, the two
				2189	// arrays are used to rejoin the segments and compute the final result.
				2190	//
				2191	// This code is based upon src/solaris/native/java/io/canonicalize_md.c
				2192
				2193
				2194	// Check the given path to see if it might need normalization. A path
				2195	// might need normalization if it contains duplicate slashes, a "."
				2196	// segment, or a ".." segment. Return -1 if no further normalization is
				2197	// possible, otherwise return the number of segments found.
				2198	//
				2199	// This method takes a string argument rather than a char array so that
				2200	// this test can be performed without invoking path.toCharArray().
				2201	//
				2202	static private int needsNormalization(String path) {
				2203	boolean normal = true;
				2204	int ns = 0; // Number of segments
				2205	int end = path.length() - 1; // Index of last char in path
				2206	int p = 0; // Index of next char in path
				2207
				2208	// Skip initial slashes
				2209	while (p <= end) {
				2210	if (path.charAt(p) != '/') break;
				2211	p++;
				2212	}
				2213	if (p > 1) normal = false;
				2214
				2215	// Scan segments
				2216	while (p <= end) {
				2217
				2218	// Looking at "." or ".." ?
				2219	if ((path.charAt(p) == '.')
				2220	&& ((p == end)
				2221	\|\| ((path.charAt(p + 1) == '/')
				2222	\|\| ((path.charAt(p + 1) == '.')
				2223	&& ((p + 1 == end)
				2224	\|\| (path.charAt(p + 2) == '/')))))) {
				2225	normal = false;
				2226	}
				2227	ns++;
				2228
				2229	// Find beginning of next segment
				2230	while (p <= end) {
				2231	if (path.charAt(p++) != '/')
				2232	continue;
				2233
				2234	// Skip redundant slashes
				2235	while (p <= end) {
				2236	if (path.charAt(p) != '/') break;
				2237	normal = false;
				2238	p++;
				2239	}
				2240
				2241	break;
				2242	}
				2243	}
				2244
				2245	return normal ? -1 : ns;
				2246	}
				2247
				2248
				2249	// Split the given path into segments, replacing slashes with nulls and
				2250	// filling in the given segment-index array.
				2251	//
				2252	// Preconditions:
				2253	// segs.length == Number of segments in path
				2254	//
				2255	// Postconditions:
				2256	// All slashes in path replaced by '\0'
				2257	// segs[i] == Index of first char in segment i (0 <= i < segs.length)
				2258	//
				2259	static private void split(char[] path, int[] segs) {
				2260	int end = path.length - 1; // Index of last char in path
				2261	int p = 0; // Index of next char in path
				2262	int i = 0; // Index of current segment
				2263
				2264	// Skip initial slashes
				2265	while (p <= end) {
				2266	if (path[p] != '/') break;
				2267	path[p] = '\0';
				2268	p++;
				2269	}
				2270
				2271	while (p <= end) {
				2272
				2273	// Note start of segment
				2274	segs[i++] = p++;
				2275
				2276	// Find beginning of next segment
				2277	while (p <= end) {
				2278	if (path[p++] != '/')
				2279	continue;
				2280	path[p - 1] = '\0';
				2281
				2282	// Skip redundant slashes
				2283	while (p <= end) {
				2284	if (path[p] != '/') break;
				2285	path[p++] = '\0';
				2286	}
				2287	break;
				2288	}
				2289	}
				2290
				2291	if (i != segs.length)
				2292	throw new InternalError(); // ASSERT
				2293	}
				2294
				2295
				2296	// Join the segments in the given path according to the given segment-index
				2297	// array, ignoring those segments whose index entries have been set to -1,
				2298	// and inserting slashes as needed. Return the length of the resulting
				2299	// path.
				2300	//
				2301	// Preconditions:
				2302	// segs[i] == -1 implies segment i is to be ignored
				2303	// path computed by split, as above, with '\0' having replaced '/'
				2304	//
				2305	// Postconditions:
				2306	// path[0] .. path[return value] == Resulting path
				2307	//
				2308	static private int join(char[] path, int[] segs) {
				2309	int ns = segs.length; // Number of segments
				2310	int end = path.length - 1; // Index of last char in path
				2311	int p = 0; // Index of next path char to write
				2312
				2313	if (path[p] == '\0') {
				2314	// Restore initial slash for absolute paths
				2315	path[p++] = '/';
				2316	}
				2317
				2318	for (int i = 0; i < ns; i++) {
				2319	int q = segs[i]; // Current segment
				2320	if (q == -1)
				2321	// Ignore this segment
				2322	continue;
				2323
				2324	if (p == q) {
				2325	// We're already at this segment, so just skip to its end
				2326	while ((p <= end) && (path[p] != '\0'))
				2327	p++;
				2328	if (p <= end) {
				2329	// Preserve trailing slash
				2330	path[p++] = '/';
				2331	}
				2332	} else if (p < q) {
				2333	// Copy q down to p
				2334	while ((q <= end) && (path[q] != '\0'))
				2335	path[p++] = path[q++];
				2336	if (q <= end) {
				2337	// Preserve trailing slash
				2338	path[p++] = '/';
				2339	}
				2340	} else
				2341	throw new InternalError(); // ASSERT false
				2342	}
				2343
				2344	return p;
				2345	}
				2346
				2347
				2348	// Remove "." segments from the given path, and remove segment pairs
				2349	// consisting of a non-".." segment followed by a ".." segment.
				2350	//
				2351	// Android-changed: App compat. Remove leading dots when resolving path. http://b/25897693
				2352	// private static void removeDots(char[] path, int[] segs) {
				2353	private static void removeDots(char[] path, int[] segs, boolean removeLeading) {
				2354	int ns = segs.length;
				2355	int end = path.length - 1;
				2356
				2357	for (int i = 0; i < ns; i++) {
				2358	int dots = 0; // Number of dots found (0, 1, or 2)
				2359
				2360	// Find next occurrence of "." or ".."
				2361	do {
				2362	int p = segs[i];
				2363	if (path[p] == '.') {
				2364	if (p == end) {
				2365	dots = 1;
				2366	break;
				2367	} else if (path[p + 1] == '\0') {
				2368	dots = 1;
				2369	break;
				2370	} else if ((path[p + 1] == '.')
				2371	&& ((p + 1 == end)
				2372	\|\| (path[p + 2] == '\0'))) {
				2373	dots = 2;
				2374	break;
				2375	}
				2376	}
				2377	i++;
				2378	} while (i < ns);
				2379	if ((i > ns) \|\| (dots == 0))
				2380	break;
				2381
				2382	if (dots == 1) {
				2383	// Remove this occurrence of "."
				2384	segs[i] = -1;
				2385	} else {
				2386	// If there is a preceding non-".." segment, remove both that
				2387	// segment and this occurrence of ".."
				2388	int j;
				2389	for (j = i - 1; j >= 0; j--) {
				2390	if (segs[j] != -1) break;
				2391	}
				2392	if (j >= 0) {
				2393	int q = segs[j];
				2394	if (!((path[q] == '.')
				2395	&& (path[q + 1] == '.')
				2396	&& (path[q + 2] == '\0'))) {
				2397	segs[i] = -1;
				2398	segs[j] = -1;
				2399	}
				2400	// Android-added: App compat. Remove leading dots when resolving path.
				2401	// This is a leading ".." segment. Per RFC 3986 RI, this should be removed as
				2402	// well. This fixes RFC 2396 "abnormal" examples.
				2403	// http://b/25897693
				2404	} else if (removeLeading) {
				2405	segs[i] = -1;
				2406	}
				2407	}
				2408	}
				2409	}
				2410
				2411
				2412	// DEVIATION: If the normalized path is relative, and if the first
				2413	// segment could be parsed as a scheme name, then prepend a "." segment
				2414	//
				2415	private static void maybeAddLeadingDot(char[] path, int[] segs) {
				2416
				2417	if (path[0] == '\0')
				2418	// The path is absolute
				2419	return;
				2420
				2421	int ns = segs.length;
				2422	int f = 0; // Index of first segment
				2423	while (f < ns) {
				2424	if (segs[f] >= 0)
				2425	break;
				2426	f++;
				2427	}
				2428	if ((f >= ns) \|\| (f == 0))
				2429	// The path is empty, or else the original first segment survived,
				2430	// in which case we already know that no leading "." is needed
				2431	return;
				2432
				2433	int p = segs[f];
				2434	while ((p < path.length) && (path[p] != ':') && (path[p] != '\0')) p++;
				2435	if (p >= path.length \|\| path[p] == '\0')
				2436	// No colon in first segment, so no "." needed
				2437	return;
				2438
				2439	// At this point we know that the first segment is unused,
				2440	// hence we can insert a "." segment at that position
				2441	path[0] = '.';
				2442	path[1] = '\0';
				2443	segs[0] = 0;
				2444	}
				2445
				2446
				2447	// Normalize the given path string. A normal path string has no empty
				2448	// segments (i.e., occurrences of "//"), no segments equal to ".", and no
				2449	// segments equal to ".." that are preceded by a segment not equal to "..".
				2450	// In contrast to Unix-style pathname normalization, for URI paths we
				2451	// always retain trailing slashes.
				2452	//
				2453	private static String normalize(String ps) {
				2454	// BEGIN Android-changed: App compat. Remove leading dots when resolving path.
				2455	// Controlled by the "boolean removeLeading" argument added to normalize().
				2456	return normalize(ps, false);
				2457	}
				2458
				2459	private static String normalize(String ps, boolean removeLeading) {
				2460	// END Android-changed: App compat. Remove leading dots when resolving path.
				2461	// Does this path need normalization?
				2462	int ns = needsNormalization(ps); // Number of segments
				2463	if (ns < 0)
				2464	// Nope -- just return it
				2465	return ps;
				2466
				2467	char[] path = ps.toCharArray(); // Path in char-array form
				2468
				2469	// Split path into segments
				2470	int[] segs = new int[ns]; // Segment-index array
				2471	split(path, segs);
				2472
				2473	// Remove dots
				2474	// Android-changed: App compat. Remove leading dots when resolving path.
				2475	// removeDots(path, segs);
				2476	removeDots(path, segs, removeLeading);
				2477
				2478	// Prevent scheme-name confusion
				2479	maybeAddLeadingDot(path, segs);
				2480
				2481	// Join the remaining segments and return the result
				2482	String s = new String(path, 0, join(path, segs));
				2483	if (s.equals(ps)) {
				2484	// string was already normalized
				2485	return ps;
				2486	}
				2487	return s;
				2488	}
				2489
				2490
				2491
				2492	// -- Character classes for parsing --
				2493
				2494	// RFC2396 precisely specifies which characters in the US-ASCII charset are
				2495	// permissible in the various components of a URI reference. We here
				2496	// define a set of mask pairs to aid in enforcing these restrictions. Each
				2497	// mask pair consists of two longs, a low mask and a high mask. Taken
				2498	// together they represent a 128-bit mask, where bit i is set iff the
				2499	// character with value i is permitted.
				2500	//
				2501	// This approach is more efficient than sequentially searching arrays of
				2502	// permitted characters. It could be made still more efficient by
				2503	// precompiling the mask information so that a character's presence in a
				2504	// given mask could be determined by a single table lookup.
				2505
				2506	// Compute the low-order mask for the characters in the given string
				2507	private static long lowMask(String chars) {
				2508	int n = chars.length();
				2509	long m = 0;
				2510	for (int i = 0; i < n; i++) {
				2511	char c = chars.charAt(i);
				2512	if (c < 64)
				2513	m \|= (1L << c);
				2514	}
				2515	return m;
				2516	}
				2517
				2518	// Compute the high-order mask for the characters in the given string
				2519	private static long highMask(String chars) {
				2520	int n = chars.length();
				2521	long m = 0;
				2522	for (int i = 0; i < n; i++) {
				2523	char c = chars.charAt(i);
				2524	if ((c >= 64) && (c < 128))
				2525	m \|= (1L << (c - 64));
				2526	}
				2527	return m;
				2528	}
				2529
				2530	// Compute a low-order mask for the characters
				2531	// between first and last, inclusive
				2532	private static long lowMask(char first, char last) {
				2533	long m = 0;
				2534	int f = Math.max(Math.min(first, 63), 0);
				2535	int l = Math.max(Math.min(last, 63), 0);
				2536	for (int i = f; i <= l; i++)
				2537	m \|= 1L << i;
				2538	return m;
				2539	}
				2540
				2541	// Compute a high-order mask for the characters
				2542	// between first and last, inclusive
				2543	private static long highMask(char first, char last) {
				2544	long m = 0;
				2545	int f = Math.max(Math.min(first, 127), 64) - 64;
				2546	int l = Math.max(Math.min(last, 127), 64) - 64;
				2547	for (int i = f; i <= l; i++)
				2548	m \|= 1L << i;
				2549	return m;
				2550	}
				2551
				2552	// Tell whether the given character is permitted by the given mask pair
				2553	private static boolean match(char c, long lowMask, long highMask) {
				2554	if (c == 0) // 0 doesn't have a slot in the mask. So, it never matches.
				2555	return false;
				2556	if (c < 64)
				2557	return ((1L << c) & lowMask) != 0;
				2558	if (c < 128)
				2559	return ((1L << (c - 64)) & highMask) != 0;
				2560	return false;
				2561	}
				2562
				2563	// Character-class masks, in reverse order from RFC2396 because
				2564	// initializers for static fields cannot make forward references.
				2565
				2566	// digit = "0" \| "1" \| "2" \| "3" \| "4" \| "5" \| "6" \| "7" \|
				2567	// "8" \| "9"
				2568	private static final long L_DIGIT = lowMask('0', '9');
				2569	private static final long H_DIGIT = 0L;
				2570
				2571	// upalpha = "A" \| "B" \| "C" \| "D" \| "E" \| "F" \| "G" \| "H" \| "I" \|
				2572	// "J" \| "K" \| "L" \| "M" \| "N" \| "O" \| "P" \| "Q" \| "R" \|
				2573	// "S" \| "T" \| "U" \| "V" \| "W" \| "X" \| "Y" \| "Z"
				2574	private static final long L_UPALPHA = 0L;
				2575	private static final long H_UPALPHA = highMask('A', 'Z');
				2576
				2577	// lowalpha = "a" \| "b" \| "c" \| "d" \| "e" \| "f" \| "g" \| "h" \| "i" \|
				2578	// "j" \| "k" \| "l" \| "m" \| "n" \| "o" \| "p" \| "q" \| "r" \|
				2579	// "s" \| "t" \| "u" \| "v" \| "w" \| "x" \| "y" \| "z"
				2580	private static final long L_LOWALPHA = 0L;
				2581	private static final long H_LOWALPHA = highMask('a', 'z');
				2582
				2583	// alpha = lowalpha \| upalpha
				2584	private static final long L_ALPHA = L_LOWALPHA \| L_UPALPHA;
				2585	private static final long H_ALPHA = H_LOWALPHA \| H_UPALPHA;
				2586
				2587	// alphanum = alpha \| digit
				2588	private static final long L_ALPHANUM = L_DIGIT \| L_ALPHA;
				2589	private static final long H_ALPHANUM = H_DIGIT \| H_ALPHA;
				2590
				2591	// hex = digit \| "A" \| "B" \| "C" \| "D" \| "E" \| "F" \|
				2592	// "a" \| "b" \| "c" \| "d" \| "e" \| "f"
				2593	private static final long L_HEX = L_DIGIT;
				2594	private static final long H_HEX = highMask('A', 'F') \| highMask('a', 'f');
				2595
				2596	// mark = "-" \| "_" \| "." \| "!" \| "~" \| "*" \| "'" \|
				2597	// "(" \| ")"
				2598	private static final long L_MARK = lowMask("-_.!~*'()");
				2599	private static final long H_MARK = highMask("-_.!~*'()");
				2600
				2601	// unreserved = alphanum \| mark
				2602	private static final long L_UNRESERVED = L_ALPHANUM \| L_MARK;
				2603	private static final long H_UNRESERVED = H_ALPHANUM \| H_MARK;
				2604
				2605	// reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
				2606	// "$" \| "," \| "[" \| "]"
				2607	// Added per RFC2732: "[", "]"
				2608	private static final long L_RESERVED = lowMask(";/?:@&=+$,[]");
				2609	private static final long H_RESERVED = highMask(";/?:@&=+$,[]");
				2610
				2611	// The zero'th bit is used to indicate that escape pairs and non-US-ASCII
				2612	// characters are allowed; this is handled by the scanEscape method below.
				2613	private static final long L_ESCAPED = 1L;
				2614	private static final long H_ESCAPED = 0L;
				2615
				2616	// uric = reserved \| unreserved \| escaped
				2617	private static final long L_URIC = L_RESERVED \| L_UNRESERVED \| L_ESCAPED;
				2618	private static final long H_URIC = H_RESERVED \| H_UNRESERVED \| H_ESCAPED;
				2619
				2620	// pchar = unreserved \| escaped \|
				2621	// ":" \| "@" \| "&" \| "=" \| "+" \| "$" \| ","
				2622	private static final long L_PCHAR
				2623	= L_UNRESERVED \| L_ESCAPED \| lowMask(":@&=+$,");
				2624	private static final long H_PCHAR
				2625	= H_UNRESERVED \| H_ESCAPED \| highMask(":@&=+$,");
				2626
				2627	// All valid path characters
				2628	private static final long L_PATH = L_PCHAR \| lowMask(";/");
				2629	private static final long H_PATH = H_PCHAR \| highMask(";/");
				2630
				2631	// Dash, for use in domainlabel and toplabel
				2632	private static final long L_DASH = lowMask("-");
				2633	private static final long H_DASH = highMask("-");
				2634
				2635	// BEGIN Android-added: Allow underscore in hostname.
				2636	// UNDERSCORE, for use in domainlabel and toplabel
				2637	private static final long L_UNDERSCORE = lowMask("_");
				2638	private static final long H_UNDERSCORE = highMask("_");
				2639	// END Android-added: Allow underscore in hostname.
				2640
				2641	// Dot, for use in hostnames
				2642	private static final long L_DOT = lowMask(".");
				2643	private static final long H_DOT = highMask(".");
				2644
				2645	// userinfo = *( unreserved \| escaped \|
				2646	// ";" \| ":" \| "&" \| "=" \| "+" \| "$" \| "," )
				2647	private static final long L_USERINFO
				2648	= L_UNRESERVED \| L_ESCAPED \| lowMask(";:&=+$,");
				2649	private static final long H_USERINFO
				2650	= H_UNRESERVED \| H_ESCAPED \| highMask(";:&=+$,");
				2651
				2652	// reg_name = 1*( unreserved \| escaped \| "$" \| "," \|
				2653	// ";" \| ":" \| "@" \| "&" \| "=" \| "+" )
				2654	private static final long L_REG_NAME
				2655	= L_UNRESERVED \| L_ESCAPED \| lowMask("$,;:@&=+");
				2656	private static final long H_REG_NAME
				2657	= H_UNRESERVED \| H_ESCAPED \| highMask("$,;:@&=+");
				2658
				2659	// All valid characters for server-based authorities
				2660	private static final long L_SERVER
				2661	= L_USERINFO \| L_ALPHANUM \| L_DASH \| lowMask(".:@[]");
				2662	private static final long H_SERVER
				2663	= H_USERINFO \| H_ALPHANUM \| H_DASH \| highMask(".:@[]");
				2664
				2665	// Special case of server authority that represents an IPv6 address
				2666	// In this case, a % does not signify an escape sequence
				2667	private static final long L_SERVER_PERCENT
				2668	= L_SERVER \| lowMask("%");
				2669	private static final long H_SERVER_PERCENT
				2670	= H_SERVER \| highMask("%");
				2671	private static final long L_LEFT_BRACKET = lowMask("[");
				2672	private static final long H_LEFT_BRACKET = highMask("[");
				2673
				2674	// scheme = alpha *( alpha \| digit \| "+" \| "-" \| "." )
				2675	private static final long L_SCHEME = L_ALPHA \| L_DIGIT \| lowMask("+-.");
				2676	private static final long H_SCHEME = H_ALPHA \| H_DIGIT \| highMask("+-.");
				2677
				2678	// uric_no_slash = unreserved \| escaped \| ";" \| "?" \| ":" \| "@" \|
				2679	// "&" \| "=" \| "+" \| "$" \| ","
				2680	private static final long L_URIC_NO_SLASH
				2681	= L_UNRESERVED \| L_ESCAPED \| lowMask(";?:@&=+$,");
				2682	private static final long H_URIC_NO_SLASH
				2683	= H_UNRESERVED \| H_ESCAPED \| highMask(";?:@&=+$,");
				2684
				2685
				2686	// -- Escaping and encoding --
				2687
				2688	private final static char[] hexDigits = {
				2689	'0', '1', '2', '3', '4', '5', '6', '7',
				2690	'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
				2691	};
				2692
				2693	private static void appendEscape(StringBuffer sb, byte b) {
				2694	sb.append('%');
				2695	sb.append(hexDigits[(b >> 4) & 0x0f]);
				2696	sb.append(hexDigits[(b >> 0) & 0x0f]);
				2697	}
				2698
				2699	private static void appendEncoded(StringBuffer sb, char c) {
				2700	ByteBuffer bb = null;
				2701	try {
				2702	bb = ThreadLocalCoders.encoderFor("UTF-8")
				2703	.encode(CharBuffer.wrap("" + c));
				2704	} catch (CharacterCodingException x) {
				2705	assert false;
				2706	}
				2707	while (bb.hasRemaining()) {
				2708	int b = bb.get() & 0xff;
				2709	if (b >= 0x80)
				2710	appendEscape(sb, (byte)b);
				2711	else
				2712	sb.append((char)b);
				2713	}
				2714	}
				2715
				2716	// Quote any characters in s that are not permitted
				2717	// by the given mask pair
				2718	//
				2719	private static String quote(String s, long lowMask, long highMask) {
				2720	int n = s.length();
				2721	StringBuffer sb = null;
				2722	boolean allowNonASCII = ((lowMask & L_ESCAPED) != 0);
				2723	for (int i = 0; i < s.length(); i++) {
				2724	char c = s.charAt(i);
				2725	if (c < '\u0080') {
				2726	if (!match(c, lowMask, highMask)) {
				2727	if (sb == null) {
				2728	sb = new StringBuffer();
				2729	sb.append(s.substring(0, i));
				2730	}
				2731	appendEscape(sb, (byte)c);
				2732	} else {
				2733	if (sb != null)
				2734	sb.append(c);
				2735	}
				2736	} else if (allowNonASCII
				2737	&& (Character.isSpaceChar(c)
				2738	\|\| Character.isISOControl(c))) {
				2739	if (sb == null) {
				2740	sb = new StringBuffer();
				2741	sb.append(s.substring(0, i));
				2742	}
				2743	appendEncoded(sb, c);
				2744	} else {
				2745	if (sb != null)
				2746	sb.append(c);
				2747	}
				2748	}
				2749	return (sb == null) ? s : sb.toString();
				2750	}
				2751
				2752	// Encodes all characters >= \u0080 into escaped, normalized UTF-8 octets,
				2753	// assuming that s is otherwise legal
				2754	//
				2755	private static String encode(String s) {
				2756	int n = s.length();
				2757	if (n == 0)
				2758	return s;
				2759
				2760	// First check whether we actually need to encode
				2761	for (int i = 0;;) {
				2762	if (s.charAt(i) >= '\u0080')
				2763	break;
				2764	if (++i >= n)
				2765	return s;
				2766	}
				2767
				2768	String ns = Normalizer.normalize(s, Normalizer.Form.NFC);
				2769	ByteBuffer bb = null;
				2770	try {
				2771	bb = ThreadLocalCoders.encoderFor("UTF-8")
				2772	.encode(CharBuffer.wrap(ns));
				2773	} catch (CharacterCodingException x) {
				2774	assert false;
				2775	}
				2776
				2777	StringBuffer sb = new StringBuffer();
				2778	while (bb.hasRemaining()) {
				2779	int b = bb.get() & 0xff;
				2780	if (b >= 0x80)
				2781	appendEscape(sb, (byte)b);
				2782	else
				2783	sb.append((char)b);
				2784	}
				2785	return sb.toString();
				2786	}
				2787
				2788	private static int decode(char c) {
				2789	if ((c >= '0') && (c <= '9'))
				2790	return c - '0';
				2791	if ((c >= 'a') && (c <= 'f'))
				2792	return c - 'a' + 10;
				2793	if ((c >= 'A') && (c <= 'F'))
				2794	return c - 'A' + 10;
				2795	assert false;
				2796	return -1;
				2797	}
				2798
				2799	private static byte decode(char c1, char c2) {
				2800	return (byte)( ((decode(c1) & 0xf) << 4)
				2801	\| ((decode(c2) & 0xf) << 0));
				2802	}
				2803
				2804	// Evaluates all escapes in s, applying UTF-8 decoding if needed. Assumes
				2805	// that escapes are well-formed syntactically, i.e., of the form %XX. If a
				2806	// sequence of escaped octets is not valid UTF-8 then the erroneous octets
				2807	// are replaced with '\uFFFD'.
				2808	// Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
				2809	// with a scope_id
				2810	//
				2811	private static String decode(String s) {
				2812	if (s == null)
				2813	return s;
				2814	int n = s.length();
				2815	if (n == 0)
				2816	return s;
				2817	if (s.indexOf('%') < 0)
				2818	return s;
				2819
				2820	StringBuffer sb = new StringBuffer(n);
				2821	ByteBuffer bb = ByteBuffer.allocate(n);
				2822	CharBuffer cb = CharBuffer.allocate(n);
				2823	CharsetDecoder dec = ThreadLocalCoders.decoderFor("UTF-8")
				2824	.onMalformedInput(CodingErrorAction.REPLACE)
				2825	.onUnmappableCharacter(CodingErrorAction.REPLACE);
				2826
				2827	// This is not horribly efficient, but it will do for now
				2828	char c = s.charAt(0);
				2829	boolean betweenBrackets = false;
				2830
				2831	for (int i = 0; i < n;) {
				2832	assert c == s.charAt(i); // Loop invariant
				2833	if (c == '[') {
				2834	betweenBrackets = true;
				2835	} else if (betweenBrackets && c == ']') {
				2836	betweenBrackets = false;
				2837	}
				2838	if (c != '%' \|\| betweenBrackets) {
				2839	sb.append(c);
				2840	if (++i >= n)
				2841	break;
				2842	c = s.charAt(i);
				2843	continue;
				2844	}
				2845	bb.clear();
				2846	int ui = i;
				2847	for (;;) {
				2848	assert (n - i >= 2);
				2849	bb.put(decode(s.charAt(++i), s.charAt(++i)));
				2850	if (++i >= n)
				2851	break;
				2852	c = s.charAt(i);
				2853	if (c != '%')
				2854	break;
				2855	}
				2856	bb.flip();
				2857	cb.clear();
				2858	dec.reset();
				2859	CoderResult cr = dec.decode(bb, cb, true);
				2860	assert cr.isUnderflow();
				2861	cr = dec.flush(cb);
				2862	assert cr.isUnderflow();
				2863	sb.append(cb.flip().toString());
				2864	}
				2865
				2866	return sb.toString();
				2867	}
				2868
				2869
				2870	// -- Parsing --
				2871
				2872	// For convenience we wrap the input URI string in a new instance of the
				2873	// following internal class. This saves always having to pass the input
				2874	// string as an argument to each internal scan/parse method.
				2875
				2876	private class Parser {
				2877
				2878	private String input; // URI input string
				2879	private boolean requireServerAuthority = false;
				2880
				2881	Parser(String s) {
				2882	input = s;
				2883	string = s;
				2884	}
				2885
				2886	// -- Methods for throwing URISyntaxException in various ways --
				2887
				2888	private void fail(String reason) throws URISyntaxException {
				2889	throw new URISyntaxException(input, reason);
				2890	}
				2891
				2892	private void fail(String reason, int p) throws URISyntaxException {
				2893	throw new URISyntaxException(input, reason, p);
				2894	}
				2895
				2896	private void failExpecting(String expected, int p)
				2897	throws URISyntaxException
				2898	{
				2899	fail("Expected " + expected, p);
				2900	}
				2901
				2902	private void failExpecting(String expected, String prior, int p)
				2903	throws URISyntaxException
				2904	{
				2905	fail("Expected " + expected + " following " + prior, p);
				2906	}
				2907
				2908
				2909	// -- Simple access to the input string --
				2910
				2911	// Return a substring of the input string
				2912	//
				2913	private String substring(int start, int end) {
				2914	return input.substring(start, end);
				2915	}
				2916
				2917	// Return the char at position p,
				2918	// assuming that p < input.length()
				2919	//
				2920	private char charAt(int p) {
				2921	return input.charAt(p);
				2922	}
				2923
				2924	// Tells whether start < end and, if so, whether charAt(start) == c
				2925	//
				2926	private boolean at(int start, int end, char c) {
				2927	return (start < end) && (charAt(start) == c);
				2928	}
				2929
				2930	// Tells whether start + s.length() < end and, if so,
				2931	// whether the chars at the start position match s exactly
				2932	//
				2933	private boolean at(int start, int end, String s) {
				2934	int p = start;
				2935	int sn = s.length();
				2936	if (sn > end - p)
				2937	return false;
				2938	int i = 0;
				2939	while (i < sn) {
				2940	if (charAt(p++) != s.charAt(i)) {
				2941	break;
				2942	}
				2943	i++;
				2944	}
				2945	return (i == sn);
				2946	}
				2947
				2948
				2949	// -- Scanning --
				2950
				2951	// The various scan and parse methods that follow use a uniform
				2952	// convention of taking the current start position and end index as
				2953	// their first two arguments. The start is inclusive while the end is
				2954	// exclusive, just as in the String class, i.e., a start/end pair
				2955	// denotes the left-open interval [start, end) of the input string.
				2956	//
				2957	// These methods never proceed past the end position. They may return
				2958	// -1 to indicate outright failure, but more often they simply return
				2959	// the position of the first char after the last char scanned. Thus
				2960	// a typical idiom is
				2961	//
				2962	// int p = start;
				2963	// int q = scan(p, end, ...);
				2964	// if (q > p)
				2965	// // We scanned something
				2966	// ...;
				2967	// else if (q == p)
				2968	// // We scanned nothing
				2969	// ...;
				2970	// else if (q == -1)
				2971	// // Something went wrong
				2972	// ...;
				2973
				2974
				2975	// Scan a specific char: If the char at the given start position is
				2976	// equal to c, return the index of the next char; otherwise, return the
				2977	// start position.
				2978	//
				2979	private int scan(int start, int end, char c) {
				2980	if ((start < end) && (charAt(start) == c))
				2981	return start + 1;
				2982	return start;
				2983	}
				2984
				2985	// Scan forward from the given start position. Stop at the first char
				2986	// in the err string (in which case -1 is returned), or the first char
				2987	// in the stop string (in which case the index of the preceding char is
				2988	// returned), or the end of the input string (in which case the length
				2989	// of the input string is returned). May return the start position if
				2990	// nothing matches.
				2991	//
				2992	private int scan(int start, int end, String err, String stop) {
				2993	int p = start;
				2994	while (p < end) {
				2995	char c = charAt(p);
				2996	if (err.indexOf(c) >= 0)
				2997	return -1;
				2998	if (stop.indexOf(c) >= 0)
				2999	break;
				3000	p++;
				3001	}
				3002	return p;
				3003	}
				3004
				3005	// Scan a potential escape sequence, starting at the given position,
				3006	// with the given first char (i.e., charAt(start) == c).
				3007	//
				3008	// This method assumes that if escapes are allowed then visible
				3009	// non-US-ASCII chars are also allowed.
				3010	//
				3011	private int scanEscape(int start, int n, char first)
				3012	throws URISyntaxException
				3013	{
				3014	int p = start;
				3015	char c = first;
				3016	if (c == '%') {
				3017	// Process escape pair
				3018	if ((p + 3 <= n)
				3019	&& match(charAt(p + 1), L_HEX, H_HEX)
				3020	&& match(charAt(p + 2), L_HEX, H_HEX)) {
				3021	return p + 3;
				3022	}
				3023	fail("Malformed escape pair", p);
				3024	} else if ((c > 128)
				3025	&& !Character.isSpaceChar(c)
				3026	&& !Character.isISOControl(c)) {
				3027	// Allow unescaped but visible non-US-ASCII chars
				3028	return p + 1;
				3029	}
				3030	return p;
				3031	}
				3032
				3033	// Scan chars that match the given mask pair
				3034	//
				3035	private int scan(int start, int n, long lowMask, long highMask)
				3036	throws URISyntaxException
				3037	{
				3038	int p = start;
				3039	while (p < n) {
				3040	char c = charAt(p);
				3041	if (match(c, lowMask, highMask)) {
				3042	p++;
				3043	continue;
				3044	}
				3045	if ((lowMask & L_ESCAPED) != 0) {
				3046	int q = scanEscape(p, n, c);
				3047	if (q > p) {
				3048	p = q;
				3049	continue;
				3050	}
				3051	}
				3052	break;
				3053	}
				3054	return p;
				3055	}
				3056
				3057	// Check that each of the chars in [start, end) matches the given mask
				3058	//
				3059	private void checkChars(int start, int end,
				3060	long lowMask, long highMask,
				3061	String what)
				3062	throws URISyntaxException
				3063	{
				3064	int p = scan(start, end, lowMask, highMask);
				3065	if (p < end)
				3066	fail("Illegal character in " + what, p);
				3067	}
				3068
				3069	// Check that the char at position p matches the given mask
				3070	//
				3071	private void checkChar(int p,
				3072	long lowMask, long highMask,
				3073	String what)
				3074	throws URISyntaxException
				3075	{
				3076	checkChars(p, p + 1, lowMask, highMask, what);
				3077	}
				3078
				3079
				3080	// -- Parsing --
				3081
				3082	// [<scheme>:]<scheme-specific-part>[#<fragment>]
				3083	//
				3084	void parse(boolean rsa) throws URISyntaxException {
				3085	requireServerAuthority = rsa;
				3086	int ssp; // Start of scheme-specific part
				3087	int n = input.length();
				3088	int p = scan(0, n, "/?#", ":");
				3089	if ((p >= 0) && at(p, n, ':')) {
				3090	if (p == 0)
				3091	failExpecting("scheme name", 0);
				3092	checkChar(0, L_ALPHA, H_ALPHA, "scheme name");
				3093	checkChars(1, p, L_SCHEME, H_SCHEME, "scheme name");
				3094	scheme = substring(0, p);
				3095	p++; // Skip ':'
				3096	ssp = p;
				3097	if (at(p, n, '/')) {
				3098	p = parseHierarchical(p, n);
				3099	} else {
				3100	int q = scan(p, n, "", "#");
				3101	if (q <= p)
				3102	failExpecting("scheme-specific part", p);
				3103	checkChars(p, q, L_URIC, H_URIC, "opaque part");
				3104	p = q;
				3105	}
				3106	} else {
				3107	ssp = 0;
				3108	p = parseHierarchical(0, n);
				3109	}
				3110	schemeSpecificPart = substring(ssp, p);
				3111	if (at(p, n, '#')) {
				3112	checkChars(p + 1, n, L_URIC, H_URIC, "fragment");
				3113	fragment = substring(p + 1, n);
				3114	p = n;
				3115	}
				3116	if (p < n)
				3117	fail("end of URI", p);
				3118	}
				3119
				3120	// [//authority]<path>[?<query>]
				3121	//
				3122	// DEVIATION from RFC2396: We allow an empty authority component as
				3123	// long as it's followed by a non-empty path, query component, or
				3124	// fragment component. This is so that URIs such as "file:///foo/bar"
				3125	// will parse. This seems to be the intent of RFC2396, though the
				3126	// grammar does not permit it. If the authority is empty then the
				3127	// userInfo, host, and port components are undefined.
				3128	//
				3129	// DEVIATION from RFC2396: We allow empty relative paths. This seems
				3130	// to be the intent of RFC2396, but the grammar does not permit it.
				3131	// The primary consequence of this deviation is that "#f" parses as a
				3132	// relative URI with an empty path.
				3133	//
				3134	private int parseHierarchical(int start, int n)
				3135	throws URISyntaxException
				3136	{
				3137	int p = start;
				3138	if (at(p, n, '/') && at(p + 1, n, '/')) {
				3139	p += 2;
				3140	int q = scan(p, n, "", "/?#");
				3141	if (q > p) {
				3142	p = parseAuthority(p, q);
				3143	} else if (q < n) {
				3144	// DEVIATION: Allow empty authority prior to non-empty
				3145	// path, query component or fragment identifier
				3146	} else
				3147	failExpecting("authority", p);
				3148	}
				3149	int q = scan(p, n, "", "?#"); // DEVIATION: May be empty
				3150	checkChars(p, q, L_PATH, H_PATH, "path");
				3151	path = substring(p, q);
				3152	p = q;
				3153	if (at(p, n, '?')) {
				3154	p++;
				3155	q = scan(p, n, "", "#");
				3156	checkChars(p, q, L_URIC, H_URIC, "query");
				3157	query = substring(p, q);
				3158	p = q;
				3159	}
				3160	return p;
				3161	}
				3162
				3163	// authority = server \| reg_name
				3164	//
				3165	// Ambiguity: An authority that is a registry name rather than a server
				3166	// might have a prefix that parses as a server. We use the fact that
				3167	// the authority component is always followed by '/' or the end of the
				3168	// input string to resolve this: If the complete authority did not
				3169	// parse as a server then we try to parse it as a registry name.
				3170	//
				3171	private int parseAuthority(int start, int n)
				3172	throws URISyntaxException
				3173	{
				3174	int p = start;
				3175	int q = p;
				3176	URISyntaxException ex = null;
				3177
				3178	boolean serverChars;
				3179	boolean regChars;
				3180
				3181	if (scan(p, n, "", "]") > p) {
				3182	// contains a literal IPv6 address, therefore % is allowed
				3183	serverChars = (scan(p, n, L_SERVER_PERCENT, H_SERVER_PERCENT) == n);
				3184	} else {
				3185	serverChars = (scan(p, n, L_SERVER, H_SERVER) == n);
				3186	}
				3187	regChars = (scan(p, n, L_REG_NAME, H_REG_NAME) == n);
				3188
				3189	if (regChars && !serverChars) {
				3190	// Must be a registry-based authority
				3191	authority = substring(p, n);
				3192	return n;
				3193	}
				3194
				3195	if (serverChars) {
				3196	// Might be (probably is) a server-based authority, so attempt
				3197	// to parse it as such. If the attempt fails, try to treat it
				3198	// as a registry-based authority.
				3199	try {
				3200	q = parseServer(p, n);
				3201	if (q < n)
				3202	failExpecting("end of authority", q);
				3203	authority = substring(p, n);
				3204	} catch (URISyntaxException x) {
				3205	// Undo results of failed parse
				3206	userInfo = null;
				3207	host = null;
				3208	port = -1;
				3209	if (requireServerAuthority) {
				3210	// If we're insisting upon a server-based authority,
				3211	// then just re-throw the exception
				3212	throw x;
				3213	} else {
				3214	// Save the exception in case it doesn't parse as a
				3215	// registry either
				3216	ex = x;
				3217	q = p;
				3218	}
				3219	}
				3220	}
				3221
				3222	if (q < n) {
				3223	if (regChars) {
				3224	// Registry-based authority
				3225	authority = substring(p, n);
				3226	} else if (ex != null) {
				3227	// Re-throw exception; it was probably due to
				3228	// a malformed IPv6 address
				3229	throw ex;
				3230	} else {
				3231	fail("Illegal character in authority", q);
				3232	}
				3233	}
				3234
				3235	return n;
				3236	}
				3237
				3238
				3239	// [<userinfo>@]<host>[:<port>]
				3240	//
				3241	private int parseServer(int start, int n)
				3242	throws URISyntaxException
				3243	{
				3244	int p = start;
				3245	int q;
				3246
				3247	// userinfo
				3248	q = scan(p, n, "/?#", "@");
				3249	if ((q >= p) && at(q, n, '@')) {
				3250	checkChars(p, q, L_USERINFO, H_USERINFO, "user info");
				3251	userInfo = substring(p, q);
				3252	p = q + 1; // Skip '@'
				3253	}
				3254
				3255	// hostname, IPv4 address, or IPv6 address
				3256	if (at(p, n, '[')) {
				3257	// DEVIATION from RFC2396: Support IPv6 addresses, per RFC2732
				3258	p++;
				3259	q = scan(p, n, "/?#", "]");
				3260	if ((q > p) && at(q, n, ']')) {
				3261	// look for a "%" scope id
				3262	int r = scan (p, q, "", "%");
				3263	if (r > p) {
				3264	parseIPv6Reference(p, r);
				3265	if (r+1 == q) {
				3266	fail ("scope id expected");
				3267	}
				3268	checkChars (r+1, q, L_ALPHANUM, H_ALPHANUM,
				3269	"scope id");
				3270	} else {
				3271	parseIPv6Reference(p, q);
				3272	}
				3273	host = substring(p-1, q+1);
				3274	p = q + 1;
				3275	} else {
				3276	failExpecting("closing bracket for IPv6 address", q);
				3277	}
				3278	} else {
				3279	q = parseIPv4Address(p, n);
				3280	if (q <= p)
				3281	q = parseHostname(p, n);
				3282	p = q;
				3283	}
				3284
				3285	// port
				3286	if (at(p, n, ':')) {
				3287	p++;
				3288	q = scan(p, n, "", "/");
				3289	if (q > p) {
				3290	checkChars(p, q, L_DIGIT, H_DIGIT, "port number");
				3291	try {
				3292	port = Integer.parseInt(substring(p, q));
				3293	} catch (NumberFormatException x) {
				3294	fail("Malformed port number", p);
				3295	}
				3296	p = q;
				3297	}
				3298	}
				3299	if (p < n)
				3300	failExpecting("port number", p);
				3301
				3302	return p;
				3303	}
				3304
				3305	// Scan a string of decimal digits whose value fits in a byte
				3306	//
				3307	private int scanByte(int start, int n)
				3308	throws URISyntaxException
				3309	{
				3310	int p = start;
				3311	int q = scan(p, n, L_DIGIT, H_DIGIT);
				3312	if (q <= p) return q;
				3313	if (Integer.parseInt(substring(p, q)) > 255) return p;
				3314	return q;
				3315	}
				3316
				3317	// Scan an IPv4 address.
				3318	//
				3319	// If the strict argument is true then we require that the given
				3320	// interval contain nothing besides an IPv4 address; if it is false
				3321	// then we only require that it start with an IPv4 address.
				3322	//
				3323	// If the interval does not contain or start with (depending upon the
				3324	// strict argument) a legal IPv4 address characters then we return -1
				3325	// immediately; otherwise we insist that these characters parse as a
				3326	// legal IPv4 address and throw an exception on failure.
				3327	//
				3328	// We assume that any string of decimal digits and dots must be an IPv4
				3329	// address. It won't parse as a hostname anyway, so making that
				3330	// assumption here allows more meaningful exceptions to be thrown.
				3331	//
				3332	private int scanIPv4Address(int start, int n, boolean strict)
				3333	throws URISyntaxException
				3334	{
				3335	int p = start;
				3336	int q;
				3337	int m = scan(p, n, L_DIGIT \| L_DOT, H_DIGIT \| H_DOT);
				3338	if ((m <= p) \|\| (strict && (m != n)))
				3339	return -1;
				3340	for (;;) {
				3341	// Per RFC2732: At most three digits per byte
				3342	// Further constraint: Each element fits in a byte
				3343	if ((q = scanByte(p, m)) <= p) break; p = q;
				3344	if ((q = scan(p, m, '.')) <= p) break; p = q;
				3345	if ((q = scanByte(p, m)) <= p) break; p = q;
				3346	if ((q = scan(p, m, '.')) <= p) break; p = q;
				3347	if ((q = scanByte(p, m)) <= p) break; p = q;
				3348	if ((q = scan(p, m, '.')) <= p) break; p = q;
				3349	if ((q = scanByte(p, m)) <= p) break; p = q;
				3350	if (q < m) break;
				3351	return q;
				3352	}
				3353	fail("Malformed IPv4 address", q);
				3354	return -1;
				3355	}
				3356
				3357	// Take an IPv4 address: Throw an exception if the given interval
				3358	// contains anything except an IPv4 address
				3359	//
				3360	private int takeIPv4Address(int start, int n, String expected)
				3361	throws URISyntaxException
				3362	{
				3363	int p = scanIPv4Address(start, n, true);
				3364	if (p <= start)
				3365	failExpecting(expected, start);
				3366	return p;
				3367	}
				3368
				3369	// Attempt to parse an IPv4 address, returning -1 on failure but
				3370	// allowing the given interval to contain [:<characters>] after
				3371	// the IPv4 address.
				3372	//
				3373	private int parseIPv4Address(int start, int n) {
				3374	int p;
				3375
				3376	try {
				3377	p = scanIPv4Address(start, n, false);
				3378	} catch (URISyntaxException x) {
				3379	return -1;
				3380	} catch (NumberFormatException nfe) {
				3381	return -1;
				3382	}
				3383
				3384	if (p > start && p < n) {
				3385	// IPv4 address is followed by something - check that
				3386	// it's a ":" as this is the only valid character to
				3387	// follow an address.
				3388	if (charAt(p) != ':') {
				3389	p = -1;
				3390	}
				3391	}
				3392
				3393	if (p > start)
				3394	host = substring(start, p);
				3395
				3396	return p;
				3397	}
				3398
				3399	// Android-changed: Allow underscore in hostname.
				3400	// Added "_" to the grammars for domainLabel and topLabel.
				3401	// hostname = domainlabel [ "." ] \| 1*( domainlabel "." ) toplabel [ "." ]
				3402	// domainlabel = alphanum \| alphanum *( alphanum \| "-" \| "_" ) alphanum
				3403	// toplabel = alpha \| alpha *( alphanum \| "-" \| "_" ) alphanum
				3404	//
				3405	private int parseHostname(int start, int n)
				3406	throws URISyntaxException
				3407	{
				3408	int p = start;
				3409	int q;
				3410	int l = -1; // Start of last parsed label
				3411
				3412	do {
				3413	// Android-changed: Allow underscore in hostname.
				3414	// RFC 2396 only allows alphanumeric characters and hyphens, but real,
				3415	// large Internet hosts in the wild use underscore, so we have to allow it.
				3416	// http://code.google.com/p/android/issues/detail?id=37577
				3417	// http://b/17579865
				3418	// http://b/18016625
				3419	// http://b/18023709
				3420
				3421	// domainlabel = alphanum [ *( alphanum \| "-" \| "_" ) alphanum ]
				3422	q = scan(p, n, L_ALPHANUM, H_ALPHANUM);
				3423	if (q <= p)
				3424	break;
				3425	l = p;
				3426	if (q > p) {
				3427	p = q;
				3428	// Android-changed: Allow underscore in hostname.
				3429	// q = scan(p, n, L_ALPHANUM \| L_DASH, H_ALPHANUM \| H_DASH);
				3430	q = scan(p, n, L_ALPHANUM \| L_DASH \| L_UNDERSCORE, H_ALPHANUM \| H_DASH \| H_UNDERSCORE);
				3431	if (q > p) {
				3432	if (charAt(q - 1) == '-')
				3433	fail("Illegal character in hostname", q - 1);
				3434	p = q;
				3435	}
				3436	}
				3437	q = scan(p, n, '.');
				3438	if (q <= p)
				3439	break;
				3440	p = q;
				3441	} while (p < n);
				3442
				3443	if ((p < n) && !at(p, n, ':'))
				3444	fail("Illegal character in hostname", p);
				3445
				3446	if (l < 0)
				3447	failExpecting("hostname", start);
				3448
				3449	// for a fully qualified hostname check that the rightmost
				3450	// label starts with an alpha character.
				3451	if (l > start && !match(charAt(l), L_ALPHA, H_ALPHA)) {
				3452	fail("Illegal character in hostname", l);
				3453	}
				3454
				3455	host = substring(start, p);
				3456	return p;
				3457	}
				3458
				3459
				3460	// IPv6 address parsing, from RFC2373: IPv6 Addressing Architecture
				3461	//
				3462	// Bug: The grammar in RFC2373 Appendix B does not allow addresses of
				3463	// the form ::12.34.56.78, which are clearly shown in the examples
				3464	// earlier in the document. Here is the original grammar:
				3465	//
				3466	// IPv6address = hexpart [ ":" IPv4address ]
				3467	// hexpart = hexseq \| hexseq "::" [ hexseq ] \| "::" [ hexseq ]
				3468	// hexseq = hex4 *( ":" hex4)
				3469	// hex4 = 1*4HEXDIG
				3470	//
				3471	// We therefore use the following revised grammar:
				3472	//
				3473	// IPv6address = hexseq [ ":" IPv4address ]
				3474	// \| hexseq [ "::" [ hexpost ] ]
				3475	// \| "::" [ hexpost ]
				3476	// hexpost = hexseq \| hexseq ":" IPv4address \| IPv4address
				3477	// hexseq = hex4 *( ":" hex4)
				3478	// hex4 = 1*4HEXDIG
				3479	//
				3480	// This covers all and only the following cases:
				3481	//
				3482	// hexseq
				3483	// hexseq : IPv4address
				3484	// hexseq ::
				3485	// hexseq :: hexseq
				3486	// hexseq :: hexseq : IPv4address
				3487	// hexseq :: IPv4address
				3488	// :: hexseq
				3489	// :: hexseq : IPv4address
				3490	// :: IPv4address
				3491	// ::
				3492	//
				3493	// Additionally we constrain the IPv6 address as follows :-
				3494	//
				3495	// i. IPv6 addresses without compressed zeros should contain
				3496	// exactly 16 bytes.
				3497	//
				3498	// ii. IPv6 addresses with compressed zeros should contain
				3499	// less than 16 bytes.
				3500
				3501	private int ipv6byteCount = 0;
				3502
				3503	private int parseIPv6Reference(int start, int n)
				3504	throws URISyntaxException
				3505	{
				3506	int p = start;
				3507	int q;
				3508	boolean compressedZeros = false;
				3509
				3510	q = scanHexSeq(p, n);
				3511
				3512	if (q > p) {
				3513	p = q;
				3514	if (at(p, n, "::")) {
				3515	compressedZeros = true;
				3516	p = scanHexPost(p + 2, n);
				3517	} else if (at(p, n, ':')) {
				3518	p = takeIPv4Address(p + 1, n, "IPv4 address");
				3519	ipv6byteCount += 4;
				3520	}
				3521	} else if (at(p, n, "::")) {
				3522	compressedZeros = true;
				3523	p = scanHexPost(p + 2, n);
				3524	}
				3525	if (p < n)
				3526	fail("Malformed IPv6 address", start);
				3527	if (ipv6byteCount > 16)
				3528	fail("IPv6 address too long", start);
				3529	if (!compressedZeros && ipv6byteCount < 16)
				3530	fail("IPv6 address too short", start);
				3531	if (compressedZeros && ipv6byteCount == 16)
				3532	fail("Malformed IPv6 address", start);
				3533
				3534	return p;
				3535	}
				3536
				3537	private int scanHexPost(int start, int n)
				3538	throws URISyntaxException
				3539	{
				3540	int p = start;
				3541	int q;
				3542
				3543	if (p == n)
				3544	return p;
				3545
				3546	q = scanHexSeq(p, n);
				3547	if (q > p) {
				3548	p = q;
				3549	if (at(p, n, ':')) {
				3550	p++;
				3551	p = takeIPv4Address(p, n, "hex digits or IPv4 address");
				3552	ipv6byteCount += 4;
				3553	}
				3554	} else {
				3555	p = takeIPv4Address(p, n, "hex digits or IPv4 address");
				3556	ipv6byteCount += 4;
				3557	}
				3558	return p;
				3559	}
				3560
				3561	// Scan a hex sequence; return -1 if one could not be scanned
				3562	//
				3563	private int scanHexSeq(int start, int n)
				3564	throws URISyntaxException
				3565	{
				3566	int p = start;
				3567	int q;
				3568
				3569	q = scan(p, n, L_HEX, H_HEX);
				3570	if (q <= p)
				3571	return -1;
				3572	if (at(q, n, '.')) // Beginning of IPv4 address
				3573	return -1;
				3574	if (q > p + 4)
				3575	fail("IPv6 hexadecimal digit sequence too long", p);
				3576	ipv6byteCount += 2;
				3577	p = q;
				3578	while (p < n) {
				3579	if (!at(p, n, ':'))
				3580	break;
				3581	if (at(p + 1, n, ':'))
				3582	break; // "::"
				3583	p++;
				3584	q = scan(p, n, L_HEX, H_HEX);
				3585	if (q <= p)
				3586	failExpecting("digits for an IPv6 address", p);
				3587	if (at(q, n, '.')) { // Beginning of IPv4 address
				3588	p--;
				3589	break;
				3590	}
				3591	if (q > p + 4)
				3592	fail("IPv6 hexadecimal digit sequence too long", p);
				3593	ipv6byteCount += 2;
				3594	p = q;
				3595	}
				3596
				3597	return p;
				3598	}
				3599
				3600	}
				3601
				3602	}