Blame - java/net/IDN.java - platform/prebuilts/fullsdk/sources/android-29

blob: a18c3a8daceff374e56df78e2c40cd1945fe617f [file] [log] [blame]

Rahul Ravikumar	0533600	2019-10-14 15:04:32 -0700	[diff] [blame]	1	/*
				2	* Copyright (C) 2014 The Android Open Source Project
				3	* Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
				4	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				5	*
				6	* This code is free software; you can redistribute it and/or modify it
				7	* under the terms of the GNU General Public License version 2 only, as
				8	* published by the Free Software Foundation. Oracle designates this
				9	* particular file as subject to the "Classpath" exception as provided
				10	* by Oracle in the LICENSE file that accompanied this code.
				11	*
				12	* This code is distributed in the hope that it will be useful, but WITHOUT
				13	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				14	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				15	* version 2 for more details (a copy is included in the LICENSE file that
				16	* accompanied this code).
				17	*
				18	* You should have received a copy of the GNU General Public License version
				19	* 2 along with this work; if not, write to the Free Software Foundation,
				20	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				21	*
				22	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
				23	* or visit www.oracle.com if you need additional information or have any
				24	* questions.
				25	*/
				26	package java.net;
				27
				28	import android.icu.text.IDNA;
				29
				30	/**
				31	* Provides methods to convert internationalized domain names (IDNs) between
				32	* a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation.
				33	* Internationalized domain names can use characters from the entire range of
				34	* Unicode, while traditional domain names are restricted to ASCII characters.
				35	* ACE is an encoding of Unicode strings that uses only ASCII characters and
				36	* can be used with software (such as the Domain Name System) that only
				37	* understands traditional domain names.
				38	*
				39	* <p>Internationalized domain names are defined in <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
				40	* RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ
				41	* <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a> algorithm, which is a
				42	* profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a>, and
				43	* <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a> algorithm to convert
				44	* domain name string back and forth.
				45	*
				46	* <p>The behavior of aforementioned conversion process can be adjusted by various flags:
				47	* <ul>
				48	* <li>If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted
				49	* can contain code points that are unassigned in Unicode 3.2, which is the
				50	* Unicode version on which IDN conversion is based. If the flag is not used,
				51	* the presence of such unassigned code points is treated as an error.
				52	* <li>If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against <a href="http://www.ietf.org/rfc/rfc1122.txt">RFC 1122</a> and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC 1123</a>.
				53	* It is an error if they don't meet the requirements.
				54	* </ul>
				55	* These flags can be logically OR'ed together.
				56	*
				57	* <p>The security consideration is important with respect to internationalization
				58	* domain name support. For example, English domain names may be <i>homographed</i>
				59	* - maliciously misspelled by substitution of non-Latin letters.
				60	* <a href="http://www.unicode.org/reports/tr36/">Unicode Technical Report #36</a>
				61	* discusses security issues of IDN support as well as possible solutions.
				62	* Applications are responsible for taking adequate security measures when using
				63	* international domain names.
				64	*
				65	* @author Edward Wang
				66	* @since 1.6
				67	*
				68	*/
				69	public final class IDN {
				70	/**
				71	* Flag to allow processing of unassigned code points
				72	*/
				73	public static final int ALLOW_UNASSIGNED = 0x01;
				74
				75	/**
				76	* Flag to turn on the check against STD-3 ASCII rules
				77	*/
				78	public static final int USE_STD3_ASCII_RULES = 0x02;
				79
				80
				81	/**
				82	* Translates a string from Unicode to ASCII Compatible Encoding (ACE),
				83	* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
				84	*
				85	* <p>ToASCII operation can fail. ToASCII fails if any step of it fails.
				86	* If ToASCII operation fails, an IllegalArgumentException will be thrown.
				87	* In this case, the input string should not be used in an internationalized domain name.
				88	*
				89	* <p> A label is an individual part of a domain name. The original ToASCII operation,
				90	* as defined in RFC 3490, only operates on a single label. This method can handle
				91	* both label and entire domain name, by assuming that labels in a domain name are
				92	* always separated by dots. The following characters are recognized as dots:
				93	* \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
				94	* and \uFF61 (halfwidth ideographic full stop). if dots are
				95	* used as label separators, this method also changes all of them to \u002E (full stop)
				96	* in output translated string.
				97	*
				98	* @param input the string to be processed
				99	* @param flag process flag; can be 0 or any logical OR of possible flags
				100	*
				101	* @return the translated {@code String}
				102	*
				103	* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
				104	*/
				105	public static String toASCII(String input, int flag) {
				106	// BEGIN Android-changed: Use ICU4J implementation
				107	try {
				108	return IDNA.convertIDNToASCII(input, flag).toString();
				109	} catch (android.icu.text.StringPrepParseException e) {
				110	// b/113787610: "." is a valid IDN but is rejected by ICU.
				111	// Usage is relatively uncommon, so only check for it if ICU throws.
				112	if (".".equals(input)) {
				113	return input;
				114	}
				115	throw new IllegalArgumentException("Invalid input to toASCII: " + input, e);
				116	}
				117	// END Android-changed: Use ICU4J implementation
				118	}
				119
				120
				121	/**
				122	* Translates a string from Unicode to ASCII Compatible Encoding (ACE),
				123	* as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
				124	*
				125	* <p> This convenience method works as if by invoking the
				126	* two-argument counterpart as follows:
				127	* <blockquote>
				128	* {@link #toASCII(String, int) toASCII}(input, 0);
				129	* </blockquote>
				130	*
				131	* @param input the string to be processed
				132	*
				133	* @return the translated {@code String}
				134	*
				135	* @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
				136	*/
				137	public static String toASCII(String input) {
				138	return toASCII(input, 0);
				139	}
				140
				141
				142	/**
				143	* Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
				144	* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
				145	*
				146	* <p>ToUnicode never fails. In case of any error, the input string is returned unmodified.
				147	*
				148	* <p> A label is an individual part of a domain name. The original ToUnicode operation,
				149	* as defined in RFC 3490, only operates on a single label. This method can handle
				150	* both label and entire domain name, by assuming that labels in a domain name are
				151	* always separated by dots. The following characters are recognized as dots:
				152	* \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
				153	* and \uFF61 (halfwidth ideographic full stop).
				154	*
				155	* @param input the string to be processed
				156	* @param flag process flag; can be 0 or any logical OR of possible flags
				157	*
				158	* @return the translated {@code String}
				159	*/
				160	public static String toUnicode(String input, int flag) {
				161	// BEGIN Android-changed: Use ICU4J implementation
				162	try {
				163	// ICU only translates separators to ASCII for toASCII.
				164	// Java expects the translation for toUnicode too.
				165	return convertFullStop(IDNA.convertIDNToUnicode(input, flag)).toString();
				166	} catch (android.icu.text.StringPrepParseException e) {
				167	// The RI documentation explicitly states that if the conversion was unsuccessful
				168	// the original string is returned.
				169	return input;
				170	}
				171	// END Android-changed: Use ICU4J implementation
				172	}
				173
				174	// BEGIN Android-added: Use ICU4J implementation
				175	private static boolean isLabelSeperator(char c) {
				176	return (c == '\u3002' \|\| c == '\uff0e' \|\| c == '\uff61');
				177	}
				178
				179	private static StringBuffer convertFullStop(StringBuffer input) {
				180	for (int i = 0; i < input.length(); i++) {
				181	if (isLabelSeperator(input.charAt(i))) {
				182	input.setCharAt(i, '.');
				183	}
				184	}
				185	return input;
				186	}
				187	// END Android-added: Use ICU4J implementation
				188
				189	/**
				190	* Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
				191	* as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
				192	*
				193	* <p> This convenience method works as if by invoking the
				194	* two-argument counterpart as follows:
				195	* <blockquote>
				196	* {@link #toUnicode(String, int) toUnicode}(input, 0);
				197	* </blockquote>
				198	*
				199	* @param input the string to be processed
				200	*
				201	* @return the translated {@code String}
				202	*/
				203	public static String toUnicode(String input) {
				204	return toUnicode(input, 0);
				205	}
				206
				207
				208	/* ---------------- Private members -------------- */
				209
				210	// Android-removed: Private helper methods, unused because we use ICU.
				211	/*
				212	// ACE Prefix is "xn--"
				213	private static final String ACE_PREFIX = "xn--";
				214	private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length();
				215
				216	private static final int MAX_LABEL_LENGTH = 63;
				217
				218	// single instance of nameprep
				219	private static StringPrep namePrep = null;
				220
				221	static {
				222	InputStream stream = null;
				223
				224	try {
				225	final String IDN_PROFILE = "uidna.spp";
				226	if (System.getSecurityManager() != null) {
				227	stream = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
				228	public InputStream run() {
				229	return StringPrep.class.getResourceAsStream(IDN_PROFILE);
				230	}
				231	});
				232	} else {
				233	stream = StringPrep.class.getResourceAsStream(IDN_PROFILE);
				234	}
				235
				236	namePrep = new StringPrep(stream);
				237	stream.close();
				238	} catch (IOException e) {
				239	// should never reach here
				240	assert false;
				241	}
				242	}
				243	*/
				244
				245	/* ---------------- Private operations -------------- */
				246
				247
				248	//
				249	// to suppress the default zero-argument constructor
				250	//
				251	private IDN() {}
				252
				253	// Android-removed: Private helper methods, unused because we use ICU.
				254	/*
				255	//
				256	// toASCII operation; should only apply to a single label
				257	//
				258	private static String toASCIIInternal(String label, int flag)
				259	{
				260	// step 1
				261	// Check if the string contains code points outside the ASCII range 0..0x7c.
				262	boolean isASCII = isAllASCII(label);
				263	StringBuffer dest;
				264
				265	// step 2
				266	// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
				267	if (!isASCII) {
				268	UCharacterIterator iter = UCharacterIterator.getInstance(label);
				269	try {
				270	dest = namePrep.prepare(iter, flag);
				271	} catch (java.text.ParseException e) {
				272	throw new IllegalArgumentException(e);
				273	}
				274	} else {
				275	dest = new StringBuffer(label);
				276	}
				277
				278	// step 8, move forward to check the smallest number of the code points
				279	// the length must be inside 1..63
				280	if (dest.length() == 0) {
				281	throw new IllegalArgumentException(
				282	"Empty label is not a legal name");
				283	}
				284
				285	// step 3
				286	// Verify the absence of non-LDH ASCII code points
				287	// 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f
				288	// Verify the absence of leading and trailing hyphen
				289	boolean useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0);
				290	if (useSTD3ASCIIRules) {
				291	for (int i = 0; i < dest.length(); i++) {
				292	int c = dest.charAt(i);
				293	if (isNonLDHAsciiCodePoint(c)) {
				294	throw new IllegalArgumentException(
				295	"Contains non-LDH ASCII characters");
				296	}
				297	}
				298
				299	if (dest.charAt(0) == '-' \|\|
				300	dest.charAt(dest.length() - 1) == '-') {
				301
				302	throw new IllegalArgumentException(
				303	"Has leading or trailing hyphen");
				304	}
				305	}
				306
				307	if (!isASCII) {
				308	// step 4
				309	// If all code points are inside 0..0x7f, skip to step 8
				310	if (!isAllASCII(dest.toString())) {
				311	// step 5
				312	// verify the sequence does not begin with ACE prefix
				313	if(!startsWithACEPrefix(dest)){
				314
				315	// step 6
				316	// encode the sequence with punycode
				317	try {
				318	dest = Punycode.encode(dest, null);
				319	} catch (java.text.ParseException e) {
				320	throw new IllegalArgumentException(e);
				321	}
				322
				323	dest = toASCIILower(dest);
				324
				325	// step 7
				326	// prepend the ACE prefix
				327	dest.insert(0, ACE_PREFIX);
				328	} else {
				329	throw new IllegalArgumentException("The input starts with the ACE Prefix");
				330	}
				331
				332	}
				333	}
				334
				335	// step 8
				336	// the length must be inside 1..63
				337	if (dest.length() > MAX_LABEL_LENGTH) {
				338	throw new IllegalArgumentException("The label in the input is too long");
				339	}
				340
				341	return dest.toString();
				342	}
				343
				344	//
				345	// toUnicode operation; should only apply to a single label
				346	//
				347	private static String toUnicodeInternal(String label, int flag) {
				348	boolean[] caseFlags = null;
				349	StringBuffer dest;
				350
				351	// step 1
				352	// find out if all the codepoints in input are ASCII
				353	boolean isASCII = isAllASCII(label);
				354
				355	if(!isASCII){
				356	// step 2
				357	// perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
				358	try {
				359	UCharacterIterator iter = UCharacterIterator.getInstance(label);
				360	dest = namePrep.prepare(iter, flag);
				361	} catch (Exception e) {
				362	// toUnicode never fails; if any step fails, return the input string
				363	return label;
				364	}
				365	} else {
				366	dest = new StringBuffer(label);
				367	}
				368
				369	// step 3
				370	// verify ACE Prefix
				371	if(startsWithACEPrefix(dest)) {
				372
				373	// step 4
				374	// Remove the ACE Prefix
				375	String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());
				376
				377	try {
				378	// step 5
				379	// Decode using punycode
				380	StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);
				381
				382	// step 6
				383	// Apply toASCII
				384	String toASCIIOut = toASCII(decodeOut.toString(), flag);
				385
				386	// step 7
				387	// verify
				388	if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
				389	// step 8
				390	// return output of step 5
				391	return decodeOut.toString();
				392	}
				393	} catch (Exception ignored) {
				394	// no-op
				395	}
				396	}
				397
				398	// just return the input
				399	return label;
				400	}
				401
				402
				403	//
				404	// LDH stands for "letter/digit/hyphen", with characters restricted to the
				405	// 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
				406	// <->.
				407	// Non LDH refers to characters in the ASCII range, but which are not
				408	// letters, digits or the hypen.
				409	//
				410	// non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F
				411	//
				412	private static boolean isNonLDHAsciiCodePoint(int ch){
				413	return (0x0000 <= ch && ch <= 0x002C) \|\|
				414	(0x002E <= ch && ch <= 0x002F) \|\|
				415	(0x003A <= ch && ch <= 0x0040) \|\|
				416	(0x005B <= ch && ch <= 0x0060) \|\|
				417	(0x007B <= ch && ch <= 0x007F);
				418	}
				419
				420	//
				421	// search dots in a string and return the index of that character;
				422	// or if there is no dots, return the length of input string
				423	// dots might be: \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
				424	// and \uFF61 (halfwidth ideographic full stop).
				425	//
				426	private static int searchDots(String s, int start) {
				427	int i;
				428	for (i = start; i < s.length(); i++) {
				429	if (isLabelSeparator(s.charAt(i))) {
				430	break;
				431	}
				432	}
				433
				434	return i;
				435	}
				436
				437	//
				438	// to check if a string is a root label, ".".
				439	//
				440	private static boolean isRootLabel(String s) {
				441	return (s.length() == 1 && isLabelSeparator(s.charAt(0)));
				442	}
				443
				444	//
				445	// to check if a character is a label separator, i.e. a dot character.
				446	//
				447	private static boolean isLabelSeparator(char c) {
				448	return (c == '.' \|\| c == '\u3002' \|\| c == '\uFF0E' \|\| c == '\uFF61');
				449	}
				450
				451	//
				452	// to check if a string only contains US-ASCII code point
				453	//
				454	private static boolean isAllASCII(String input) {
				455	boolean isASCII = true;
				456	for (int i = 0; i < input.length(); i++) {
				457	int c = input.charAt(i);
				458	if (c > 0x7F) {
				459	isASCII = false;
				460	break;
				461	}
				462	}
				463	return isASCII;
				464	}
				465
				466	//
				467	// to check if a string starts with ACE-prefix
				468	//
				469	private static boolean startsWithACEPrefix(StringBuffer input){
				470	boolean startsWithPrefix = true;
				471
				472	if(input.length() < ACE_PREFIX_LENGTH){
				473	return false;
				474	}
				475	for(int i = 0; i < ACE_PREFIX_LENGTH; i++){
				476	if(toASCIILower(input.charAt(i)) != ACE_PREFIX.charAt(i)){
				477	startsWithPrefix = false;
				478	}
				479	}
				480	return startsWithPrefix;
				481	}
				482
				483	private static char toASCIILower(char ch){
				484	if('A' <= ch && ch <= 'Z'){
				485	return (char)(ch + 'a' - 'A');
				486	}
				487	return ch;
				488	}
				489
				490	private static StringBuffer toASCIILower(StringBuffer input){
				491	StringBuffer dest = new StringBuffer();
				492	for(int i = 0; i < input.length();i++){
				493	dest.append(toASCIILower(input.charAt(i)));
				494	}
				495	return dest;
				496	}
				497	*/
				498	}