| // Copyright 2013-2014 The rust-url developers. |
| // |
| // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
| // option. This file may not be copied, modified, or distributed |
| // except according to those terms. |
| |
| |
| //! Abstraction that conditionally compiles either to rust-encoding, |
| //! or to only support UTF-8. |
| |
| #[cfg(feature = "query_encoding")] extern crate encoding; |
| |
| use std::borrow::Cow; |
| #[cfg(feature = "query_encoding")] use std::fmt::{self, Debug, Formatter}; |
| |
| #[cfg(feature = "query_encoding")] use self::encoding::types::{DecoderTrap, EncoderTrap}; |
| #[cfg(feature = "query_encoding")] use self::encoding::label::encoding_from_whatwg_label; |
| #[cfg(feature = "query_encoding")] pub use self::encoding::types::EncodingRef; |
| |
| #[cfg(feature = "query_encoding")] |
| #[derive(Copy, Clone)] |
| pub struct EncodingOverride { |
| /// `None` means UTF-8. |
| encoding: Option<EncodingRef> |
| } |
| |
| #[cfg(feature = "query_encoding")] |
| impl EncodingOverride { |
| pub fn from_opt_encoding(encoding: Option<EncodingRef>) -> Self { |
| encoding.map(Self::from_encoding).unwrap_or_else(Self::utf8) |
| } |
| |
| pub fn from_encoding(encoding: EncodingRef) -> Self { |
| EncodingOverride { |
| encoding: if encoding.name() == "utf-8" { None } else { Some(encoding) } |
| } |
| } |
| |
| #[inline] |
| pub fn utf8() -> Self { |
| EncodingOverride { encoding: None } |
| } |
| |
| pub fn lookup(label: &[u8]) -> Option<Self> { |
| // Don't use String::from_utf8_lossy since no encoding label contains U+FFFD |
| // https://encoding.spec.whatwg.org/#names-and-labels |
| ::std::str::from_utf8(label) |
| .ok() |
| .and_then(encoding_from_whatwg_label) |
| .map(Self::from_encoding) |
| } |
| |
| /// https://encoding.spec.whatwg.org/#get-an-output-encoding |
| pub fn to_output_encoding(self) -> Self { |
| if let Some(encoding) = self.encoding { |
| if matches!(encoding.name(), "utf-16le" | "utf-16be") { |
| return Self::utf8() |
| } |
| } |
| self |
| } |
| |
| pub fn is_utf8(&self) -> bool { |
| self.encoding.is_none() |
| } |
| |
| pub fn name(&self) -> &'static str { |
| match self.encoding { |
| Some(encoding) => encoding.name(), |
| None => "utf-8", |
| } |
| } |
| |
| pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { |
| match self.encoding { |
| // `encoding.decode` never returns `Err` when called with `DecoderTrap::Replace` |
| Some(encoding) => encoding.decode(&input, DecoderTrap::Replace).unwrap().into(), |
| None => decode_utf8_lossy(input), |
| } |
| } |
| |
| pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { |
| match self.encoding { |
| // `encoding.encode` never returns `Err` when called with `EncoderTrap::NcrEscape` |
| Some(encoding) => Cow::Owned(encoding.encode(&input, EncoderTrap::NcrEscape).unwrap()), |
| None => encode_utf8(input) |
| } |
| } |
| } |
| |
| #[cfg(feature = "query_encoding")] |
| impl Debug for EncodingOverride { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| write!(f, "EncodingOverride {{ encoding: ")?; |
| match self.encoding { |
| Some(e) => write!(f, "{} }}", e.name()), |
| None => write!(f, "None }}") |
| } |
| } |
| } |
| |
| #[cfg(not(feature = "query_encoding"))] |
| #[derive(Copy, Clone, Debug)] |
| pub struct EncodingOverride; |
| |
| #[cfg(not(feature = "query_encoding"))] |
| impl EncodingOverride { |
| #[inline] |
| pub fn utf8() -> Self { |
| EncodingOverride |
| } |
| |
| pub fn decode<'a>(&self, input: Cow<'a, [u8]>) -> Cow<'a, str> { |
| decode_utf8_lossy(input) |
| } |
| |
| pub fn encode<'a>(&self, input: Cow<'a, str>) -> Cow<'a, [u8]> { |
| encode_utf8(input) |
| } |
| } |
| |
| pub fn decode_utf8_lossy(input: Cow<[u8]>) -> Cow<str> { |
| match input { |
| Cow::Borrowed(bytes) => String::from_utf8_lossy(bytes), |
| Cow::Owned(bytes) => { |
| let raw_utf8: *const [u8]; |
| match String::from_utf8_lossy(&bytes) { |
| Cow::Borrowed(utf8) => raw_utf8 = utf8.as_bytes(), |
| Cow::Owned(s) => return s.into(), |
| } |
| // from_utf8_lossy returned a borrow of `bytes` unchanged. |
| debug_assert!(raw_utf8 == &*bytes as *const [u8]); |
| // Reuse the existing `Vec` allocation. |
| unsafe { String::from_utf8_unchecked(bytes) }.into() |
| } |
| } |
| } |
| |
| pub fn encode_utf8(input: Cow<str>) -> Cow<[u8]> { |
| match input { |
| Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()), |
| Cow::Owned(s) => Cow::Owned(s.into_bytes()) |
| } |
| } |