| use std::{fmt, str::FromStr}; |
| |
| use crate::{ |
| Buffer, ParseError, |
| err::{perr, ParseErrorKind::*}, |
| parse::{first_byte_or_empty, hex_digit_value, check_suffix}, |
| }; |
| |
| |
| /// An integer literal, e.g. `27`, `0x7F`, `0b101010u8` or `5_000_000i64`. |
| /// |
| /// An integer literal consists of an optional base prefix (`0b`, `0o`, `0x`), |
| /// the main part (digits and underscores), and an optional type suffix |
| /// (e.g. `u64` or `i8`). See [the reference][ref] for more information. |
| /// |
| /// Note that integer literals are always positive: the grammar does not contain |
| /// the minus sign at all. The minus sign is just the unary negate operator, |
| /// not part of the literal. Which is interesting for cases like `- 128i8`: |
| /// here, the literal itself would overflow the specified type (`i8` cannot |
| /// represent 128). That's why in rustc, the literal overflow check is |
| /// performed as a lint after parsing, not during the lexing stage. Similarly, |
| /// [`IntegerLit::parse`] does not perform an overflow check. |
| /// |
| /// [ref]: https://doc.rust-lang.org/reference/tokens.html#integer-literals |
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| #[non_exhaustive] |
| pub struct IntegerLit<B: Buffer> { |
| /// The raw literal. Grammar: `<prefix?><main part><suffix?>`. |
| raw: B, |
| /// First index of the main number part (after the base prefix). |
| start_main_part: usize, |
| /// First index not part of the main number part. |
| end_main_part: usize, |
| /// Parsed `raw[..start_main_part]`. |
| base: IntegerBase, |
| } |
| |
| impl<B: Buffer> IntegerLit<B> { |
| /// Parses the input as an integer literal. Returns an error if the input is |
| /// invalid or represents a different kind of literal. |
| pub fn parse(input: B) -> Result<Self, ParseError> { |
| match first_byte_or_empty(&input)? { |
| digit @ b'0'..=b'9' => { |
| // TODO: simplify once RFC 2528 is stabilized |
| let IntegerLit { |
| start_main_part, |
| end_main_part, |
| base, |
| .. |
| } = parse_impl(&input, digit)?; |
| |
| Ok(Self { raw: input, start_main_part, end_main_part, base }) |
| }, |
| _ => Err(perr(0, DoesNotStartWithDigit)), |
| } |
| } |
| |
| /// Performs the actual string to int conversion to obtain the integer |
| /// value. The optional type suffix of the literal **is ignored by this |
| /// method**. This means `N` does not need to match the type suffix! |
| /// |
| /// Returns `None` if the literal overflows `N`. |
| /// |
| /// Hint: `u128` can represent all possible values integer literal values, |
| /// as there are no negative literals (see type docs). Thus you can, for |
| /// example, safely use `lit.value::<u128>().to_string()` to get a decimal |
| /// string. (Technically, Rust integer literals can represent arbitrarily |
| /// large numbers, but those would be rejected at a later stage by the Rust |
| /// compiler). |
| pub fn value<N: FromIntegerLiteral>(&self) -> Option<N> { |
| let base = N::from_small_number(self.base.value()); |
| |
| let mut acc = N::from_small_number(0); |
| for digit in self.raw_main_part().bytes() { |
| if digit == b'_' { |
| continue; |
| } |
| |
| // We don't actually need the base here: we already know this main |
| // part only contains digits valid for the specified base. |
| let digit = hex_digit_value(digit) |
| .unwrap_or_else(|| unreachable!("bug: integer main part contains non-digit")); |
| |
| acc = acc.checked_mul(base)?; |
| acc = acc.checked_add(N::from_small_number(digit))?; |
| } |
| |
| Some(acc) |
| } |
| |
| /// The base of this integer literal. |
| pub fn base(&self) -> IntegerBase { |
| self.base |
| } |
| |
| /// The main part containing the digits and potentially `_`. Do not try to |
| /// parse this directly as that would ignore the base! |
| pub fn raw_main_part(&self) -> &str { |
| &(*self.raw)[self.start_main_part..self.end_main_part] |
| } |
| |
| /// The optional suffix. Returns `""` if the suffix is empty/does not exist. |
| /// |
| /// If you want the type, try `IntegerType::from_suffix(lit.suffix())`. |
| pub fn suffix(&self) -> &str { |
| &(*self.raw)[self.end_main_part..] |
| } |
| |
| /// Returns the raw input that was passed to `parse`. |
| pub fn raw_input(&self) -> &str { |
| &self.raw |
| } |
| |
| /// Returns the raw input that was passed to `parse`, potentially owned. |
| pub fn into_raw_input(self) -> B { |
| self.raw |
| } |
| } |
| |
| impl IntegerLit<&str> { |
| /// Makes a copy of the underlying buffer and returns the owned version of |
| /// `Self`. |
| pub fn to_owned(&self) -> IntegerLit<String> { |
| IntegerLit { |
| raw: self.raw.to_owned(), |
| start_main_part: self.start_main_part, |
| end_main_part: self.end_main_part, |
| base: self.base, |
| } |
| } |
| } |
| |
| impl<B: Buffer> fmt::Display for IntegerLit<B> { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| write!(f, "{}", &*self.raw) |
| } |
| } |
| |
| /// Integer literal types. *Implementation detail*. |
| /// |
| /// Implemented for all integer literal types. This trait is sealed and cannot |
| /// be implemented outside of this crate. The trait's methods are implementation |
| /// detail of this library and are not subject to semver. |
| pub trait FromIntegerLiteral: self::sealed::Sealed + Copy { |
| /// Creates itself from the given number. `n` is guaranteed to be `<= 16`. |
| #[doc(hidden)] |
| fn from_small_number(n: u8) -> Self; |
| |
| #[doc(hidden)] |
| fn checked_add(self, rhs: Self) -> Option<Self>; |
| |
| #[doc(hidden)] |
| fn checked_mul(self, rhs: Self) -> Option<Self>; |
| |
| #[doc(hidden)] |
| fn ty() -> IntegerType; |
| } |
| |
| macro_rules! impl_from_int_literal { |
| ($( $ty:ty => $variant:ident ,)* ) => { |
| $( |
| impl self::sealed::Sealed for $ty {} |
| impl FromIntegerLiteral for $ty { |
| fn from_small_number(n: u8) -> Self { |
| n as Self |
| } |
| fn checked_add(self, rhs: Self) -> Option<Self> { |
| self.checked_add(rhs) |
| } |
| fn checked_mul(self, rhs: Self) -> Option<Self> { |
| self.checked_mul(rhs) |
| } |
| fn ty() -> IntegerType { |
| IntegerType::$variant |
| } |
| } |
| )* |
| }; |
| } |
| |
| impl_from_int_literal!( |
| u8 => U8, u16 => U16, u32 => U32, u64 => U64, u128 => U128, usize => Usize, |
| i8 => I8, i16 => I16, i32 => I32, i64 => I64, i128 => I128, isize => Isize, |
| ); |
| |
| mod sealed { |
| pub trait Sealed {} |
| } |
| |
| /// Precondition: first byte of string has to be in `b'0'..=b'9'`. |
| #[inline(never)] |
| pub(crate) fn parse_impl(input: &str, first: u8) -> Result<IntegerLit<&str>, ParseError> { |
| // Figure out base and strip prefix base, if it exists. |
| let (end_prefix, base) = match (first, input.as_bytes().get(1)) { |
| (b'0', Some(b'b')) => (2, IntegerBase::Binary), |
| (b'0', Some(b'o')) => (2, IntegerBase::Octal), |
| (b'0', Some(b'x')) => (2, IntegerBase::Hexadecimal), |
| |
| // Everything else is treated as decimal. Several cases are caught |
| // by this: |
| // - "123" |
| // - "0" |
| // - "0u8" |
| // - "0r" -> this will error later |
| _ => (0, IntegerBase::Decimal), |
| }; |
| let without_prefix = &input[end_prefix..]; |
| |
| |
| // Scan input to find the first character that's not a valid digit. |
| let is_valid_digit = match base { |
| IntegerBase::Binary => |b| matches!(b, b'0' | b'1' | b'_'), |
| IntegerBase::Octal => |b| matches!(b, b'0'..=b'7' | b'_'), |
| IntegerBase::Decimal => |b| matches!(b, b'0'..=b'9' | b'_'), |
| IntegerBase::Hexadecimal => |b| matches!(b, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'), |
| }; |
| let end_main = without_prefix.bytes() |
| .position(|b| !is_valid_digit(b)) |
| .unwrap_or(without_prefix.len()); |
| let (main_part, suffix) = without_prefix.split_at(end_main); |
| |
| check_suffix(suffix).map_err(|kind| { |
| // This is just to have a nicer error kind for this special case. If the |
| // suffix is invalid, it is non-empty -> unwrap ok. |
| let first = suffix.as_bytes()[0]; |
| if !is_valid_digit(first) && first.is_ascii_digit() { |
| perr(end_main + end_prefix, InvalidDigit) |
| } else { |
| perr(end_main + end_prefix..input.len(), kind) |
| } |
| })?; |
| if suffix.starts_with('e') || suffix.starts_with('E') { |
| return Err(perr(end_main, IntegerSuffixStartingWithE)); |
| } |
| |
| // Make sure main number part is not empty. |
| if main_part.bytes().filter(|&b| b != b'_').count() == 0 { |
| return Err(perr(end_prefix..end_prefix + end_main, NoDigits)); |
| } |
| |
| Ok(IntegerLit { |
| raw: input, |
| start_main_part: end_prefix, |
| end_main_part: end_main + end_prefix, |
| base, |
| }) |
| } |
| |
| |
| /// The bases in which an integer can be specified. |
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| pub enum IntegerBase { |
| Binary, |
| Octal, |
| Decimal, |
| Hexadecimal, |
| } |
| |
| impl IntegerBase { |
| /// Returns the literal prefix that indicates this base, i.e. `"0b"`, |
| /// `"0o"`, `""` and `"0x"`. |
| pub fn prefix(self) -> &'static str { |
| match self { |
| Self::Binary => "0b", |
| Self::Octal => "0o", |
| Self::Decimal => "", |
| Self::Hexadecimal => "0x", |
| } |
| } |
| |
| /// Returns the base value, i.e. 2, 8, 10 or 16. |
| pub fn value(self) -> u8 { |
| match self { |
| Self::Binary => 2, |
| Self::Octal => 8, |
| Self::Decimal => 10, |
| Self::Hexadecimal => 16, |
| } |
| } |
| } |
| |
| /// All possible integer type suffixes. |
| #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| #[non_exhaustive] |
| pub enum IntegerType { |
| U8, |
| U16, |
| U32, |
| U64, |
| U128, |
| Usize, |
| I8, |
| I16, |
| I32, |
| I64, |
| I128, |
| Isize, |
| } |
| |
| impl IntegerType { |
| /// Returns the type corresponding to the given suffix (e.g. `"u8"` is |
| /// mapped to `Self::U8`). If the suffix is not a valid integer type, |
| /// `None` is returned. |
| pub fn from_suffix(suffix: &str) -> Option<Self> { |
| match suffix { |
| "u8" => Some(Self::U8), |
| "u16" => Some(Self::U16), |
| "u32" => Some(Self::U32), |
| "u64" => Some(Self::U64), |
| "u128" => Some(Self::U128), |
| "usize" => Some(Self::Usize), |
| "i8" => Some(Self::I8), |
| "i16" => Some(Self::I16), |
| "i32" => Some(Self::I32), |
| "i64" => Some(Self::I64), |
| "i128" => Some(Self::I128), |
| "isize" => Some(Self::Isize), |
| _ => None, |
| } |
| } |
| |
| /// Returns the suffix for this type, e.g. `"u8"` for `Self::U8`. |
| pub fn suffix(self) -> &'static str { |
| match self { |
| Self::U8 => "u8", |
| Self::U16 => "u16", |
| Self::U32 => "u32", |
| Self::U64 => "u64", |
| Self::U128 => "u128", |
| Self::Usize => "usize", |
| Self::I8 => "i8", |
| Self::I16 => "i16", |
| Self::I32 => "i32", |
| Self::I64 => "i64", |
| Self::I128 => "i128", |
| Self::Isize => "isize", |
| } |
| } |
| } |
| |
| impl FromStr for IntegerType { |
| type Err = (); |
| fn from_str(s: &str) -> Result<Self, Self::Err> { |
| Self::from_suffix(s).ok_or(()) |
| } |
| } |
| |
| impl fmt::Display for IntegerType { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| self.suffix().fmt(f) |
| } |
| } |
| |
| |
| #[cfg(test)] |
| mod tests; |