blob: 92914f9bafb61ae0d6c55874db88f22d1e79962a [file] [log] [blame]
Jeff Vander Stoepa4352c12020-12-17 22:14:52 +01001// Copyright 2016 The rust-url developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! This Rust crate implements IDNA
10//! [per the WHATWG URL Standard](https://url.spec.whatwg.org/#idna).
11//!
12//! It also exposes the underlying algorithms from [*Unicode IDNA Compatibility Processing*
13//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)
14//! and [Punycode (RFC 3492)](https://tools.ietf.org/html/rfc3492).
15//!
16//! Quoting from [UTS #46’s introduction](http://www.unicode.org/reports/tr46/#Introduction):
17//!
18//! > Initially, domain names were restricted to ASCII characters.
19//! > A system was introduced in 2003 for internationalized domain names (IDN).
20//! > This system is called Internationalizing Domain Names for Applications,
21//! > or IDNA2003 for short.
22//! > This mechanism supports IDNs by means of a client software transformation
23//! > into a format known as Punycode.
24//! > A revision of IDNA was approved in 2010 (IDNA2008).
25//! > This revision has a number of incompatibilities with IDNA2003.
26//! >
27//! > The incompatibilities force implementers of client software,
28//! > such as browsers and emailers,
29//! > to face difficult choices during the transition period
30//! > as registries shift from IDNA2003 to IDNA2008.
31//! > This document specifies a mechanism
32//! > that minimizes the impact of this transition for client software,
33//! > allowing client software to access domains that are valid under either system.
Jeff Vander Stoepad390ea2024-02-02 10:35:16 +010034#![no_std]
35
36// For forwards compatibility
37#[cfg(feature = "std")]
38extern crate std;
39
40extern crate alloc;
41
42#[cfg(not(feature = "alloc"))]
43compile_error!("the `alloc` feature must be enabled");
Jeff Vander Stoepa4352c12020-12-17 22:14:52 +010044
Jeff Vander Stoep2e710d12022-12-12 12:55:08 +010045#[cfg(test)]
Jeff Vander Stoepa4352c12020-12-17 22:14:52 +010046#[macro_use]
Jeff Vander Stoep2e710d12022-12-12 12:55:08 +010047extern crate assert_matches;
Jeff Vander Stoepa4352c12020-12-17 22:14:52 +010048
Jeff Vander Stoepad390ea2024-02-02 10:35:16 +010049use alloc::string::String;
50
Jeff Vander Stoepa4352c12020-12-17 22:14:52 +010051pub mod punycode;
52mod uts46;
53
Haibo Huanga03885d2021-02-09 17:23:28 -080054pub use crate::uts46::{Config, Errors, Idna};
Jeff Vander Stoepa4352c12020-12-17 22:14:52 +010055
56/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm.
57///
58/// Return the ASCII representation a domain name,
59/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
60/// and using Punycode as necessary.
61///
62/// This process may fail.
63pub fn domain_to_ascii(domain: &str) -> Result<String, uts46::Errors> {
64 Config::default().to_ascii(domain)
65}
66
67/// The [domain to ASCII](https://url.spec.whatwg.org/#concept-domain-to-ascii) algorithm,
68/// with the `beStrict` flag set.
69pub fn domain_to_ascii_strict(domain: &str) -> Result<String, uts46::Errors> {
70 Config::default()
71 .use_std3_ascii_rules(true)
72 .verify_dns_length(true)
73 .to_ascii(domain)
74}
75
76/// The [domain to Unicode](https://url.spec.whatwg.org/#concept-domain-to-unicode) algorithm.
77///
78/// Return the Unicode representation of a domain name,
79/// normalizing characters (upper-case to lower-case and other kinds of equivalence)
80/// and decoding Punycode as necessary.
81///
82/// This may indicate [syntax violations](https://url.spec.whatwg.org/#syntax-violation)
83/// but always returns a string for the mapped domain.
84pub fn domain_to_unicode(domain: &str) -> (String, Result<(), uts46::Errors>) {
85 Config::default().to_unicode(domain)
86}