Add SDK 29 sources.
Test: N/A
Change-Id: Iedb7a31029e003928eb16f7e69ed147e72bb6235
diff --git a/java/net/IDN.java b/java/net/IDN.java
new file mode 100644
index 0000000..a18c3a8
--- /dev/null
+++ b/java/net/IDN.java
@@ -0,0 +1,498 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation. Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package java.net;
+
+import android.icu.text.IDNA;
+
+/**
+ * Provides methods to convert internationalized domain names (IDNs) between
+ * a normal Unicode representation and an ASCII Compatible Encoding (ACE) representation.
+ * Internationalized domain names can use characters from the entire range of
+ * Unicode, while traditional domain names are restricted to ASCII characters.
+ * ACE is an encoding of Unicode strings that uses only ASCII characters and
+ * can be used with software (such as the Domain Name System) that only
+ * understands traditional domain names.
+ *
+ * <p>Internationalized domain names are defined in <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
+ * RFC 3490 defines two operations: ToASCII and ToUnicode. These 2 operations employ
+ * <a href="http://www.ietf.org/rfc/rfc3491.txt">Nameprep</a> algorithm, which is a
+ * profile of <a href="http://www.ietf.org/rfc/rfc3454.txt">Stringprep</a>, and
+ * <a href="http://www.ietf.org/rfc/rfc3492.txt">Punycode</a> algorithm to convert
+ * domain name string back and forth.
+ *
+ * <p>The behavior of aforementioned conversion process can be adjusted by various flags:
+ * <ul>
+ * <li>If the ALLOW_UNASSIGNED flag is used, the domain name string to be converted
+ * can contain code points that are unassigned in Unicode 3.2, which is the
+ * Unicode version on which IDN conversion is based. If the flag is not used,
+ * the presence of such unassigned code points is treated as an error.
+ * <li>If the USE_STD3_ASCII_RULES flag is used, ASCII strings are checked against <a href="http://www.ietf.org/rfc/rfc1122.txt">RFC 1122</a> and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC 1123</a>.
+ * It is an error if they don't meet the requirements.
+ * </ul>
+ * These flags can be logically OR'ed together.
+ *
+ * <p>The security consideration is important with respect to internationalization
+ * domain name support. For example, English domain names may be <i>homographed</i>
+ * - maliciously misspelled by substitution of non-Latin letters.
+ * <a href="http://www.unicode.org/reports/tr36/">Unicode Technical Report #36</a>
+ * discusses security issues of IDN support as well as possible solutions.
+ * Applications are responsible for taking adequate security measures when using
+ * international domain names.
+ *
+ * @author Edward Wang
+ * @since 1.6
+ *
+ */
+public final class IDN {
+ /**
+ * Flag to allow processing of unassigned code points
+ */
+ public static final int ALLOW_UNASSIGNED = 0x01;
+
+ /**
+ * Flag to turn on the check against STD-3 ASCII rules
+ */
+ public static final int USE_STD3_ASCII_RULES = 0x02;
+
+
+ /**
+ * Translates a string from Unicode to ASCII Compatible Encoding (ACE),
+ * as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
+ *
+ * <p>ToASCII operation can fail. ToASCII fails if any step of it fails.
+ * If ToASCII operation fails, an IllegalArgumentException will be thrown.
+ * In this case, the input string should not be used in an internationalized domain name.
+ *
+ * <p> A label is an individual part of a domain name. The original ToASCII operation,
+ * as defined in RFC 3490, only operates on a single label. This method can handle
+ * both label and entire domain name, by assuming that labels in a domain name are
+ * always separated by dots. The following characters are recognized as dots:
+ * \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
+ * and \uFF61 (halfwidth ideographic full stop). if dots are
+ * used as label separators, this method also changes all of them to \u002E (full stop)
+ * in output translated string.
+ *
+ * @param input the string to be processed
+ * @param flag process flag; can be 0 or any logical OR of possible flags
+ *
+ * @return the translated {@code String}
+ *
+ * @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
+ */
+ public static String toASCII(String input, int flag) {
+ // BEGIN Android-changed: Use ICU4J implementation
+ try {
+ return IDNA.convertIDNToASCII(input, flag).toString();
+ } catch (android.icu.text.StringPrepParseException e) {
+ // b/113787610: "." is a valid IDN but is rejected by ICU.
+ // Usage is relatively uncommon, so only check for it if ICU throws.
+ if (".".equals(input)) {
+ return input;
+ }
+ throw new IllegalArgumentException("Invalid input to toASCII: " + input, e);
+ }
+ // END Android-changed: Use ICU4J implementation
+ }
+
+
+ /**
+ * Translates a string from Unicode to ASCII Compatible Encoding (ACE),
+ * as defined by the ToASCII operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
+ *
+ * <p> This convenience method works as if by invoking the
+ * two-argument counterpart as follows:
+ * <blockquote>
+ * {@link #toASCII(String, int) toASCII}(input, 0);
+ * </blockquote>
+ *
+ * @param input the string to be processed
+ *
+ * @return the translated {@code String}
+ *
+ * @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
+ */
+ public static String toASCII(String input) {
+ return toASCII(input, 0);
+ }
+
+
+ /**
+ * Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
+ * as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
+ *
+ * <p>ToUnicode never fails. In case of any error, the input string is returned unmodified.
+ *
+ * <p> A label is an individual part of a domain name. The original ToUnicode operation,
+ * as defined in RFC 3490, only operates on a single label. This method can handle
+ * both label and entire domain name, by assuming that labels in a domain name are
+ * always separated by dots. The following characters are recognized as dots:
+ * \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
+ * and \uFF61 (halfwidth ideographic full stop).
+ *
+ * @param input the string to be processed
+ * @param flag process flag; can be 0 or any logical OR of possible flags
+ *
+ * @return the translated {@code String}
+ */
+ public static String toUnicode(String input, int flag) {
+ // BEGIN Android-changed: Use ICU4J implementation
+ try {
+ // ICU only translates separators to ASCII for toASCII.
+ // Java expects the translation for toUnicode too.
+ return convertFullStop(IDNA.convertIDNToUnicode(input, flag)).toString();
+ } catch (android.icu.text.StringPrepParseException e) {
+ // The RI documentation explicitly states that if the conversion was unsuccessful
+ // the original string is returned.
+ return input;
+ }
+ // END Android-changed: Use ICU4J implementation
+ }
+
+ // BEGIN Android-added: Use ICU4J implementation
+ private static boolean isLabelSeperator(char c) {
+ return (c == '\u3002' || c == '\uff0e' || c == '\uff61');
+ }
+
+ private static StringBuffer convertFullStop(StringBuffer input) {
+ for (int i = 0; i < input.length(); i++) {
+ if (isLabelSeperator(input.charAt(i))) {
+ input.setCharAt(i, '.');
+ }
+ }
+ return input;
+ }
+ // END Android-added: Use ICU4J implementation
+
+ /**
+ * Translates a string from ASCII Compatible Encoding (ACE) to Unicode,
+ * as defined by the ToUnicode operation of <a href="http://www.ietf.org/rfc/rfc3490.txt">RFC 3490</a>.
+ *
+ * <p> This convenience method works as if by invoking the
+ * two-argument counterpart as follows:
+ * <blockquote>
+ * {@link #toUnicode(String, int) toUnicode}(input, 0);
+ * </blockquote>
+ *
+ * @param input the string to be processed
+ *
+ * @return the translated {@code String}
+ */
+ public static String toUnicode(String input) {
+ return toUnicode(input, 0);
+ }
+
+
+ /* ---------------- Private members -------------- */
+
+ // Android-removed: Private helper methods, unused because we use ICU.
+ /*
+ // ACE Prefix is "xn--"
+ private static final String ACE_PREFIX = "xn--";
+ private static final int ACE_PREFIX_LENGTH = ACE_PREFIX.length();
+
+ private static final int MAX_LABEL_LENGTH = 63;
+
+ // single instance of nameprep
+ private static StringPrep namePrep = null;
+
+ static {
+ InputStream stream = null;
+
+ try {
+ final String IDN_PROFILE = "uidna.spp";
+ if (System.getSecurityManager() != null) {
+ stream = AccessController.doPrivileged(new PrivilegedAction<InputStream>() {
+ public InputStream run() {
+ return StringPrep.class.getResourceAsStream(IDN_PROFILE);
+ }
+ });
+ } else {
+ stream = StringPrep.class.getResourceAsStream(IDN_PROFILE);
+ }
+
+ namePrep = new StringPrep(stream);
+ stream.close();
+ } catch (IOException e) {
+ // should never reach here
+ assert false;
+ }
+ }
+ */
+
+ /* ---------------- Private operations -------------- */
+
+
+ //
+ // to suppress the default zero-argument constructor
+ //
+ private IDN() {}
+
+ // Android-removed: Private helper methods, unused because we use ICU.
+ /*
+ //
+ // toASCII operation; should only apply to a single label
+ //
+ private static String toASCIIInternal(String label, int flag)
+ {
+ // step 1
+ // Check if the string contains code points outside the ASCII range 0..0x7c.
+ boolean isASCII = isAllASCII(label);
+ StringBuffer dest;
+
+ // step 2
+ // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
+ if (!isASCII) {
+ UCharacterIterator iter = UCharacterIterator.getInstance(label);
+ try {
+ dest = namePrep.prepare(iter, flag);
+ } catch (java.text.ParseException e) {
+ throw new IllegalArgumentException(e);
+ }
+ } else {
+ dest = new StringBuffer(label);
+ }
+
+ // step 8, move forward to check the smallest number of the code points
+ // the length must be inside 1..63
+ if (dest.length() == 0) {
+ throw new IllegalArgumentException(
+ "Empty label is not a legal name");
+ }
+
+ // step 3
+ // Verify the absence of non-LDH ASCII code points
+ // 0..0x2c, 0x2e..0x2f, 0x3a..0x40, 0x5b..0x60, 0x7b..0x7f
+ // Verify the absence of leading and trailing hyphen
+ boolean useSTD3ASCIIRules = ((flag & USE_STD3_ASCII_RULES) != 0);
+ if (useSTD3ASCIIRules) {
+ for (int i = 0; i < dest.length(); i++) {
+ int c = dest.charAt(i);
+ if (isNonLDHAsciiCodePoint(c)) {
+ throw new IllegalArgumentException(
+ "Contains non-LDH ASCII characters");
+ }
+ }
+
+ if (dest.charAt(0) == '-' ||
+ dest.charAt(dest.length() - 1) == '-') {
+
+ throw new IllegalArgumentException(
+ "Has leading or trailing hyphen");
+ }
+ }
+
+ if (!isASCII) {
+ // step 4
+ // If all code points are inside 0..0x7f, skip to step 8
+ if (!isAllASCII(dest.toString())) {
+ // step 5
+ // verify the sequence does not begin with ACE prefix
+ if(!startsWithACEPrefix(dest)){
+
+ // step 6
+ // encode the sequence with punycode
+ try {
+ dest = Punycode.encode(dest, null);
+ } catch (java.text.ParseException e) {
+ throw new IllegalArgumentException(e);
+ }
+
+ dest = toASCIILower(dest);
+
+ // step 7
+ // prepend the ACE prefix
+ dest.insert(0, ACE_PREFIX);
+ } else {
+ throw new IllegalArgumentException("The input starts with the ACE Prefix");
+ }
+
+ }
+ }
+
+ // step 8
+ // the length must be inside 1..63
+ if (dest.length() > MAX_LABEL_LENGTH) {
+ throw new IllegalArgumentException("The label in the input is too long");
+ }
+
+ return dest.toString();
+ }
+
+ //
+ // toUnicode operation; should only apply to a single label
+ //
+ private static String toUnicodeInternal(String label, int flag) {
+ boolean[] caseFlags = null;
+ StringBuffer dest;
+
+ // step 1
+ // find out if all the codepoints in input are ASCII
+ boolean isASCII = isAllASCII(label);
+
+ if(!isASCII){
+ // step 2
+ // perform the nameprep operation; flag ALLOW_UNASSIGNED is used here
+ try {
+ UCharacterIterator iter = UCharacterIterator.getInstance(label);
+ dest = namePrep.prepare(iter, flag);
+ } catch (Exception e) {
+ // toUnicode never fails; if any step fails, return the input string
+ return label;
+ }
+ } else {
+ dest = new StringBuffer(label);
+ }
+
+ // step 3
+ // verify ACE Prefix
+ if(startsWithACEPrefix(dest)) {
+
+ // step 4
+ // Remove the ACE Prefix
+ String temp = dest.substring(ACE_PREFIX_LENGTH, dest.length());
+
+ try {
+ // step 5
+ // Decode using punycode
+ StringBuffer decodeOut = Punycode.decode(new StringBuffer(temp), null);
+
+ // step 6
+ // Apply toASCII
+ String toASCIIOut = toASCII(decodeOut.toString(), flag);
+
+ // step 7
+ // verify
+ if (toASCIIOut.equalsIgnoreCase(dest.toString())) {
+ // step 8
+ // return output of step 5
+ return decodeOut.toString();
+ }
+ } catch (Exception ignored) {
+ // no-op
+ }
+ }
+
+ // just return the input
+ return label;
+ }
+
+
+ //
+ // LDH stands for "letter/digit/hyphen", with characters restricted to the
+ // 26-letter Latin alphabet <A-Z a-z>, the digits <0-9>, and the hyphen
+ // <->.
+ // Non LDH refers to characters in the ASCII range, but which are not
+ // letters, digits or the hypen.
+ //
+ // non-LDH = 0..0x2C, 0x2E..0x2F, 0x3A..0x40, 0x5B..0x60, 0x7B..0x7F
+ //
+ private static boolean isNonLDHAsciiCodePoint(int ch){
+ return (0x0000 <= ch && ch <= 0x002C) ||
+ (0x002E <= ch && ch <= 0x002F) ||
+ (0x003A <= ch && ch <= 0x0040) ||
+ (0x005B <= ch && ch <= 0x0060) ||
+ (0x007B <= ch && ch <= 0x007F);
+ }
+
+ //
+ // search dots in a string and return the index of that character;
+ // or if there is no dots, return the length of input string
+ // dots might be: \u002E (full stop), \u3002 (ideographic full stop), \uFF0E (fullwidth full stop),
+ // and \uFF61 (halfwidth ideographic full stop).
+ //
+ private static int searchDots(String s, int start) {
+ int i;
+ for (i = start; i < s.length(); i++) {
+ if (isLabelSeparator(s.charAt(i))) {
+ break;
+ }
+ }
+
+ return i;
+ }
+
+ //
+ // to check if a string is a root label, ".".
+ //
+ private static boolean isRootLabel(String s) {
+ return (s.length() == 1 && isLabelSeparator(s.charAt(0)));
+ }
+
+ //
+ // to check if a character is a label separator, i.e. a dot character.
+ //
+ private static boolean isLabelSeparator(char c) {
+ return (c == '.' || c == '\u3002' || c == '\uFF0E' || c == '\uFF61');
+ }
+
+ //
+ // to check if a string only contains US-ASCII code point
+ //
+ private static boolean isAllASCII(String input) {
+ boolean isASCII = true;
+ for (int i = 0; i < input.length(); i++) {
+ int c = input.charAt(i);
+ if (c > 0x7F) {
+ isASCII = false;
+ break;
+ }
+ }
+ return isASCII;
+ }
+
+ //
+ // to check if a string starts with ACE-prefix
+ //
+ private static boolean startsWithACEPrefix(StringBuffer input){
+ boolean startsWithPrefix = true;
+
+ if(input.length() < ACE_PREFIX_LENGTH){
+ return false;
+ }
+ for(int i = 0; i < ACE_PREFIX_LENGTH; i++){
+ if(toASCIILower(input.charAt(i)) != ACE_PREFIX.charAt(i)){
+ startsWithPrefix = false;
+ }
+ }
+ return startsWithPrefix;
+ }
+
+ private static char toASCIILower(char ch){
+ if('A' <= ch && ch <= 'Z'){
+ return (char)(ch + 'a' - 'A');
+ }
+ return ch;
+ }
+
+ private static StringBuffer toASCIILower(StringBuffer input){
+ StringBuffer dest = new StringBuffer();
+ for(int i = 0; i < input.length();i++){
+ dest.append(toASCIILower(input.charAt(i)));
+ }
+ return dest;
+ }
+ */
+}