Jeff Vander Stoep | a4352c1 | 2020-12-17 22:14:52 +0100 | [diff] [blame] | 1 | // Copyright 2016 The rust-url developers. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 4 | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 5 | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| 6 | // option. This file may not be copied, modified, or distributed |
| 7 | // except according to those terms. |
| 8 | |
| 9 | //! This Rust crate implements IDNA |
| 10 | //! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna). |
| 11 | //! |
| 12 | //! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing* |
| 13 | //! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/) |
| 14 | //! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492). |
| 15 | //! |
| 16 | //! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction): |
| 17 | //! |
| 18 | //! > Initially, domain names were restricted to ASCII characters. |
| 19 | //! > A system was introduced in 2003 for internationalized domain names (IDN). |
| 20 | //! > This system is called Internationalizing Domain Names for Applications, |
| 21 | //! > or IDNA2003 for short. |
| 22 | //! > This mechanism supports IDNs by means of a client software transformation |
| 23 | //! > into a format known as Punycode. |
| 24 | //! > A revision of IDNA was approved in 2010 (IDNA2008). |
| 25 | //! > This revision has a number of incompatibilities with IDNA2003. |
| 26 | //! > |
| 27 | //! > The incompatibilities force implementers of client software, |
| 28 | //! > such as browsers and emailers, |
| 29 | //! > to face difficult choices during the transition period |
| 30 | //! > as registries shift from IDNA2003 to IDNA2008. |
| 31 | //! > This document specifies a mechanism |
| 32 | //! > that minimizes the impact of this transition for client software, |
| 33 | //! > allowing client software to access domains that are valid under either system. |
Jeff Vander Stoep | ad390ea | 2024-02-02 10:35:16 +0100 | [diff] [blame] | 34 | #![no_std] |
| 35 | |
| 36 | // For forwards compatibility |
| 37 | #[cfg(feature = "std")] |
| 38 | extern crate std; |
| 39 | |
| 40 | extern crate alloc; |
| 41 | |
| 42 | #[cfg(not(feature = "alloc"))] |
| 43 | compile_error!("the `alloc` feature must be enabled"); |
Jeff Vander Stoep | a4352c1 | 2020-12-17 22:14:52 +0100 | [diff] [blame] | 44 | |
Jeff Vander Stoep | 2e710d1 | 2022-12-12 12:55:08 +0100 | [diff] [blame] | 45 | #[cfg(test)] |
Jeff Vander Stoep | a4352c1 | 2020-12-17 22:14:52 +0100 | [diff] [blame] | 46 | #[macro_use] |
Jeff Vander Stoep | 2e710d1 | 2022-12-12 12:55:08 +0100 | [diff] [blame] | 47 | extern crate assert_matches; |
Jeff Vander Stoep | a4352c1 | 2020-12-17 22:14:52 +0100 | [diff] [blame] | 48 | |
Jeff Vander Stoep | ad390ea | 2024-02-02 10:35:16 +0100 | [diff] [blame] | 49 | use alloc::string::String; |
| 50 | |
Jeff Vander Stoep | a4352c1 | 2020-12-17 22:14:52 +0100 | [diff] [blame] | 51 | pub mod punycode; |
| 52 | mod uts46; |
| 53 | |
Haibo Huang | a03885d | 2021-02-09 17:23:28 -0800 | [diff] [blame] | 54 | pub use crate::uts46::{Config, Errors, Idna}; |
Jeff Vander Stoep | a4352c1 | 2020-12-17 22:14:52 +0100 | [diff] [blame] | 55 | |
| 56 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm. |
| 57 | /// |
| 58 | /// Return the ASCII representation a domain name, |
| 59 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) |
| 60 | /// and using Punycode as necessary. |
| 61 | /// |
| 62 | /// This process may fail. |
| 63 | pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> { |
| 64 | Config::default().to_ascii(domain) |
| 65 | } |
| 66 | |
| 67 | /// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm, |
| 68 | /// with the `beStrict` flag set. |
| 69 | pub fn domain_to_ascii_strict(domain: &str) -> Result<String, uts46::Errors> { |
| 70 | Config::default() |
| 71 | .use_std3_ascii_rules(true) |
| 72 | .verify_dns_length(true) |
| 73 | .to_ascii(domain) |
| 74 | } |
| 75 | |
| 76 | /// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm. |
| 77 | /// |
| 78 | /// Return the Unicode representation of a domain name, |
| 79 | /// normalizing characters (upper-case to lower-case and other kinds of equivalence) |
| 80 | /// and decoding Punycode as necessary. |
| 81 | /// |
| 82 | /// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation) |
| 83 | /// but always returns a string for the mapped domain. |
| 84 | pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) { |
| 85 | Config::default().to_unicode(domain) |
| 86 | } |