vendor/fluent-syntax-0.11.1/src/unicode.rs - toolchain/rustc - Git at Google

 //! A set of helper functions for unescaping Fluent unicode escape sequences.
 //!
 //! # Unicode
 //!
 //! Fluent supports UTF-8 in all FTL resources, but it also allows
 //! unicode sequences to be escaped in [`String
 //! Literals`](super::ast::InlineExpression::StringLiteral).
 //!
 //! Four byte sequences are encoded with `\u` and six byte
 //! sequences using `\U`.
 //! ## Example
 //!
 //! ```
 //! use fluent_syntax::unicode::unescape_unicode_to_string;
 //!
 //! assert_eq!(
 //!     unescape_unicode_to_string("Foo \\u5bd2 Bar"),
 //!     "Foo 寒 Bar"
 //! );
 //!
 //! assert_eq!(
 //!     unescape_unicode_to_string("Foo \\U01F68A Bar"),
 //!     "Foo 🚊 Bar"
 //! );
 //! ```
 //!
 //! # Other unescapes
 //!
 //! This also allows for a char `"` to be present inside an FTL string literal,
 //! and for `\` itself to be escaped.
 //!
 //! ## Example
 //!
 //! ```
 //! use fluent_syntax::unicode::unescape_unicode_to_string;
 //!
 //! assert_eq!(
 //!     unescape_unicode_to_string("Foo \\\" Bar"),
 //!     "Foo \" Bar"
 //! );
 //! assert_eq!(
 //!     unescape_unicode_to_string("Foo \\\\ Bar"),
 //!     "Foo \\ Bar"
 //! );
 //! ```
 use std::borrow::Cow;
 use std::char;
 use std::fmt;

 const UNKNOWN_CHAR: char = '�';

 fn encode_unicode(s: Option<&str>) -> char {
     s.and_then(|s| u32::from_str_radix(s, 16).ok().and_then(char::from_u32))
         .unwrap_or(UNKNOWN_CHAR)
 }

 /// Unescapes to a writer without allocating.
 ///
 /// ## Example
 ///
 /// ```
 /// use fluent_syntax::unicode::unescape_unicode;
 ///
 /// let mut s = String::new();
 /// unescape_unicode(&mut s, "Foo \\U01F60A Bar");
 /// assert_eq!(s, "Foo 😊 Bar");
 /// ```
 pub fn unescape_unicode<W>(w: &mut W, input: &str) -> fmt::Result
 where
     W: fmt::Write,
 {
     let bytes = input.as_bytes();

     let mut start = 0;
     let mut ptr = 0;

     while let Some(b) = bytes.get(ptr) {
         if b != &b'\\' {
             ptr += 1;
             continue;
         }
         if start != ptr {
             w.write_str(&input[start..ptr])?;
         }

         ptr += 1;

         let new_char = match bytes.get(ptr) {
             Some(b'\\') => '\\',
             Some(b'"') => '"',
             Some(u @ b'u') | Some(u @ b'U') => {
                 let seq_start = ptr + 1;
                 let len = if u == &b'u' { 4 } else { 6 };
                 ptr += len;
                 encode_unicode(input.get(seq_start..seq_start + len))
             }
             _ => UNKNOWN_CHAR,
         };
         ptr += 1;
         w.write_char(new_char)?;
         start = ptr;
     }
     if start != ptr {
         w.write_str(&input[start..ptr])?;
     }
     Ok(())
 }

 /// Unescapes to a `Cow<str>` optionally allocating.
 ///
 /// ## Example
 ///
 /// ```
 /// use fluent_syntax::unicode::unescape_unicode_to_string;
 ///
 /// assert_eq!(
 ///     unescape_unicode_to_string("Foo \\U01F60A Bar"),
 ///     "Foo 😊 Bar"
 /// );
 /// ```
 pub fn unescape_unicode_to_string(input: &str) -> Cow<str> {
     let bytes = input.as_bytes();
     let mut result = Cow::from(input);

     let mut ptr = 0;

     while let Some(b) = bytes.get(ptr) {
         if b != &b'\\' {
             if let Cow::Owned(ref mut s) = result {
                 s.push(*b as char);
             }
             ptr += 1;
             continue;
         }

         if let Cow::Borrowed(_) = result {
             result = Cow::from(&input[0..ptr]);
         }

         ptr += 1;

         let new_char = match bytes.get(ptr) {
             Some(b'\\') => '\\',
             Some(b'"') => '"',
             Some(u @ b'u') | Some(u @ b'U') => {
                 let start = ptr + 1;
                 let len = if u == &b'u' { 4 } else { 6 };
                 ptr += len;
                 input
                     .get(start..(start + len))
                     .map_or(UNKNOWN_CHAR, |slice| encode_unicode(Some(slice)))
             }
             _ => UNKNOWN_CHAR,
         };
         result.to_mut().push(new_char);
         ptr += 1;
     }
     result
 }
	//! A set of helper functions for unescaping Fluent unicode escape sequences.
	//!
	//! # Unicode
	//!
	//! Fluent supports UTF-8 in all FTL resources, but it also allows
	//! unicode sequences to be escaped in [`String
	//! Literals`](super::ast::InlineExpression::StringLiteral).
	//!
	//! Four byte sequences are encoded with `\u` and six byte
	//! sequences using `\U`.
	//! ## Example
	//!
	//! ```
	//! use fluent_syntax::unicode::unescape_unicode_to_string;
	//!
	//! assert_eq!(
	//! unescape_unicode_to_string("Foo \\u5bd2 Bar"),
	//! "Foo 寒 Bar"
	//! );
	//!
	//! assert_eq!(
	//! unescape_unicode_to_string("Foo \\U01F68A Bar"),
	//! "Foo 🚊 Bar"
	//! );
	//! ```
	//!
	//! # Other unescapes
	//!
	//! This also allows for a char `"` to be present inside an FTL string literal,
	//! and for `\` itself to be escaped.
	//!
	//! ## Example
	//!
	//! ```
	//! use fluent_syntax::unicode::unescape_unicode_to_string;
	//!
	//! assert_eq!(
	//! unescape_unicode_to_string("Foo \\\" Bar"),
	//! "Foo \" Bar"
	//! );
	//! assert_eq!(
	//! unescape_unicode_to_string("Foo \\\\ Bar"),
	//! "Foo \\ Bar"
	//! );
	//! ```
	use std::borrow::Cow;
	use std::char;
	use std::fmt;

	const UNKNOWN_CHAR: char = '�';

	fn encode_unicode(s: Option<&str>) -> char {
	s.and_then(\|s\| u32::from_str_radix(s, 16).ok().and_then(char::from_u32))
	.unwrap_or(UNKNOWN_CHAR)
	}

	/// Unescapes to a writer without allocating.
	///
	/// ## Example
	///
	/// ```
	/// use fluent_syntax::unicode::unescape_unicode;
	///
	/// let mut s = String::new();
	/// unescape_unicode(&mut s, "Foo \\U01F60A Bar");
	/// assert_eq!(s, "Foo 😊 Bar");
	/// ```
	pub fn unescape_unicode<W>(w: &mut W, input: &str) -> fmt::Result
	where
	W: fmt::Write,
	{
	let bytes = input.as_bytes();

	let mut start = 0;
	let mut ptr = 0;

	while let Some(b) = bytes.get(ptr) {
	if b != &b'\\' {
	ptr += 1;
	continue;
	}
	if start != ptr {
	w.write_str(&input[start..ptr])?;
	}

	ptr += 1;

	let new_char = match bytes.get(ptr) {
	Some(b'\\') => '\\',
	Some(b'"') => '"',
	Some(u @ b'u') \| Some(u @ b'U') => {
	let seq_start = ptr + 1;
	let len = if u == &b'u' { 4 } else { 6 };
	ptr += len;
	encode_unicode(input.get(seq_start..seq_start + len))
	}
	_ => UNKNOWN_CHAR,
	};
	ptr += 1;
	w.write_char(new_char)?;
	start = ptr;
	}
	if start != ptr {
	w.write_str(&input[start..ptr])?;
	}
	Ok(())
	}

	/// Unescapes to a `Cow<str>` optionally allocating.
	///
	/// ## Example
	///
	/// ```
	/// use fluent_syntax::unicode::unescape_unicode_to_string;
	///
	/// assert_eq!(
	/// unescape_unicode_to_string("Foo \\U01F60A Bar"),
	/// "Foo 😊 Bar"
	/// );
	/// ```
	pub fn unescape_unicode_to_string(input: &str) -> Cow<str> {
	let bytes = input.as_bytes();
	let mut result = Cow::from(input);

	let mut ptr = 0;

	while let Some(b) = bytes.get(ptr) {
	if b != &b'\\' {
	if let Cow::Owned(ref mut s) = result {
	s.push(*b as char);
	}
	ptr += 1;
	continue;
	}

	if let Cow::Borrowed(_) = result {
	result = Cow::from(&input[0..ptr]);
	}

	ptr += 1;

	let new_char = match bytes.get(ptr) {
	Some(b'\\') => '\\',
	Some(b'"') => '"',
	Some(u @ b'u') \| Some(u @ b'U') => {
	let start = ptr + 1;
	let len = if u == &b'u' { 4 } else { 6 };
	ptr += len;
	input
	.get(start..(start + len))
	.map_or(UNKNOWN_CHAR, \|slice\| encode_unicode(Some(slice)))
	}
	_ => UNKNOWN_CHAR,
	};
	result.to_mut().push(new_char);
	ptr += 1;
	}
	result
	}