| //! Parsing and inspecting Rust literal tokens. |
| //! |
| //! This library offers functionality to parse Rust literals, i.e. tokens in the |
| //! Rust programming language that represent fixed values. The grammar for |
| //! those is defined [here][ref]. |
| //! |
| //! This kind of functionality already exists in the crate `syn`. However, as |
| //! you oftentimes don't need (nor want) the full power of `syn`, `litrs` was |
| //! built. This crate also offers a bit more flexibility compared to `syn` |
| //! (only regarding literals, of course). |
| //! |
| //! |
| //! # Quick start |
| //! |
| //! | **`StringLit::try_from(tt)?.value()`** | |
| //! | - | |
| //! |
| //! ... where `tt` is a `proc_macro::TokenTree` and where [`StringLit`] can be |
| //! replaced with [`Literal`] or other types of literals (e.g. [`FloatLit`]). |
| //! Calling `value()` returns the value that is represented by the literal. |
| //! |
| //! **Mini Example** |
| //! |
| //! ```ignore |
| //! use proc_macro::TokenStream; |
| //! |
| //! #[proc_macro] |
| //! pub fn foo(input: TokenStream) -> TokenStream { |
| //! let first_token = input.into_iter().next().unwrap(); // Do proper error handling! |
| //! let string_value = match litrs::StringLit::try_from(first_token) { |
| //! Ok(string_lit) => string_lit.value(), |
| //! Err(e) => return e.to_compile_error(), |
| //! }; |
| //! |
| //! // `string_value` is the string value with all escapes resolved. |
| //! todo!() |
| //! } |
| //! ``` |
| //! |
| //! # Overview |
| //! |
| //! The main types of this library are [`Literal`], representing any kind of |
| //! literal, and `*Lit`, like [`StringLit`] or [`FloatLit`], representing a |
| //! specific kind of literal. |
| //! |
| //! There are different ways to obtain such a literal type: |
| //! |
| //! - **`parse`**: parses a `&str` or `String` and returns `Result<_, |
| //! ParseError>`. For example: [`Literal::parse`] and |
| //! [`IntegerLit::parse`]. |
| //! |
| //! - **`From<proc_macro::Literal> for Literal`**: turns a `Literal` value from |
| //! the `proc_macro` crate into a `Literal` from this crate. |
| //! |
| //! - **`TryFrom<proc_macro::Literal> for *Lit`**: tries to turn a |
| //! `proc_macro::Literal` into a specific literal type of this crate. If |
| //! the input is a literal of a different kind, `Err(InvalidToken)` is |
| //! returned. |
| //! |
| //! - **`TryFrom<proc_macro::TokenTree>`**: attempts to turn a token tree into a |
| //! literal type of this crate. An error is returned if the token tree is |
| //! not a literal, or if you are trying to turn it into a specific kind of |
| //! literal and the token tree is a different kind of literal. |
| //! |
| //! All of the `From` and `TryFrom` conversions also work for reference to |
| //! `proc_macro` types. Additionally, if the crate feature `proc-macro2` is |
| //! enabled (which it is by default), all these `From` and `TryFrom` impls also |
| //! exist for the corresponding `proc_macro2` types. |
| //! |
| //! **Note**: `true` and `false` are `Ident`s when passed to your proc macro. |
| //! The `TryFrom<TokenTree>` impls check for those two special idents and |
| //! return a [`BoolLit`] appropriately. For that reason, there is also no |
| //! `TryFrom<proc_macro::Literal>` impl for [`BoolLit`]. The `proc_macro::Literal` |
| //! simply cannot represent bool literals. |
| //! |
| //! |
| //! # Examples |
| //! |
| //! In a proc-macro: |
| //! |
| //! ```ignore |
| //! use std::convert::TryFrom; |
| //! use proc_macro::TokenStream; |
| //! use litrs::FloatLit; |
| //! |
| //! #[proc_macro] |
| //! pub fn foo(input: TokenStream) -> TokenStream { |
| //! let mut input = input.into_iter().collect::<Vec<_>>(); |
| //! if input.len() != 1 { |
| //! // Please do proper error handling in your real code! |
| //! panic!("expected exactly one token as input"); |
| //! } |
| //! let token = input.remove(0); |
| //! |
| //! match FloatLit::try_from(token) { |
| //! Ok(float_lit) => { /* do something */ } |
| //! Err(e) => return e.to_compile_error(), |
| //! } |
| //! |
| //! // Dummy output |
| //! TokenStream::new() |
| //! } |
| //! ``` |
| //! |
| //! Parsing from string: |
| //! |
| //! ``` |
| //! use litrs::{FloatLit, Literal}; |
| //! |
| //! // Parse a specific kind of literal (float in this case): |
| //! let float_lit = FloatLit::parse("3.14f32"); |
| //! assert!(float_lit.is_ok()); |
| //! assert_eq!(float_lit.unwrap().suffix(), "f32"); |
| //! assert!(FloatLit::parse("'c'").is_err()); |
| //! |
| //! // Parse any kind of literal. After parsing, you can inspect the literal |
| //! // and decide what to do in each case. |
| //! let lit = Literal::parse("0xff80").expect("failed to parse literal"); |
| //! match lit { |
| //! Literal::Integer(lit) => { /* ... */ } |
| //! Literal::Float(lit) => { /* ... */ } |
| //! Literal::Bool(lit) => { /* ... */ } |
| //! Literal::Char(lit) => { /* ... */ } |
| //! Literal::String(lit) => { /* ... */ } |
| //! Literal::Byte(lit) => { /* ... */ } |
| //! Literal::ByteString(lit) => { /* ... */ } |
| //! } |
| //! ``` |
| //! |
| //! |
| //! |
| //! # Crate features |
| //! |
| //! - `proc-macro2` (**default**): adds the dependency `proc_macro2`, a bunch of |
| //! `From` and `TryFrom` impls, and [`InvalidToken::to_compile_error2`]. |
| //! - `check_suffix`: if enabled, `parse` functions will exactly verify that the |
| //! literal suffix is valid. Adds the dependency `unicode-xid`. If disabled, |
| //! only an approximate check (only in ASCII range) is done. If you are |
| //! writing a proc macro, you don't need to enable this as the suffix is |
| //! already checked by the compiler. |
| //! |
| //! |
| //! [ref]: https://doc.rust-lang.org/reference/tokens.html#literals |
| //! |
| |
| #![deny(missing_debug_implementations)] |
| |
| extern crate proc_macro; |
| |
| #[cfg(test)] |
| #[macro_use] |
| mod test_util; |
| |
| #[cfg(test)] |
| mod tests; |
| |
| mod bool; |
| mod byte; |
| mod bytestr; |
| mod char; |
| mod err; |
| mod escape; |
| mod float; |
| mod impls; |
| mod integer; |
| mod parse; |
| mod string; |
| |
| |
| use std::{borrow::{Borrow, Cow}, fmt, ops::{Deref, Range}}; |
| |
| pub use self::{ |
| bool::BoolLit, |
| byte::ByteLit, |
| bytestr::ByteStringLit, |
| char::CharLit, |
| err::{InvalidToken, ParseError}, |
| float::{FloatLit, FloatType}, |
| integer::{FromIntegerLiteral, IntegerLit, IntegerBase, IntegerType}, |
| string::StringLit, |
| }; |
| |
| |
| // ============================================================================================== |
| // ===== `Literal` and type defs |
| // ============================================================================================== |
| |
| /// A literal. This is the main type of this library. |
| /// |
| /// This type is generic over the underlying buffer `B`, which can be `&str` or |
| /// `String`. |
| /// |
| /// To create this type, you have to either call [`Literal::parse`] with an |
| /// input string or use the `From<_>` impls of this type. The impls are only |
| /// available of the corresponding crate features are enabled (they are enabled |
| /// by default). |
| #[derive(Debug, Clone, PartialEq, Eq)] |
| pub enum Literal<B: Buffer> { |
| Bool(BoolLit), |
| Integer(IntegerLit<B>), |
| Float(FloatLit<B>), |
| Char(CharLit<B>), |
| String(StringLit<B>), |
| Byte(ByteLit<B>), |
| ByteString(ByteStringLit<B>), |
| } |
| |
| impl<B: Buffer> Literal<B> { |
| /// Parses the given input as a Rust literal. |
| pub fn parse(input: B) -> Result<Self, ParseError> { |
| parse::parse(input) |
| } |
| |
| /// Returns the suffix of this literal or `""` if it doesn't have one. |
| /// |
| /// Rust token grammar actually allows suffixes for all kinds of tokens. |
| /// Most Rust programmer only know the type suffixes for integer and |
| /// floats, e.g. `0u32`. And in normal Rust code, everything else causes an |
| /// error. But it is possible to pass literals with arbitrary suffixes to |
| /// proc macros, for example: |
| /// |
| /// ```ignore |
| /// some_macro!(3.14f33 16px '🦊'good_boy "toph"beifong); |
| /// ``` |
| /// |
| /// Boolean literals, not actually being literals, but idents, cannot have |
| /// suffixes and this method always returns `""` for those. |
| /// |
| /// There are some edge cases to be aware of: |
| /// - Integer suffixes must not start with `e` or `E` as that conflicts with |
| /// the exponent grammar for floats. `0e1` is a float; `0eel` is also |
| /// parsed as a float and results in an error. |
| /// - Hexadecimal integers eagerly parse digits, so `0x5abcdefgh` has a |
| /// suffix von `gh`. |
| /// - Suffixes can contain and start with `_`, but for integer and number |
| /// literals, `_` is eagerly parsed as part of the number, so `1_x` has |
| /// the suffix `x`. |
| /// - The input `55f32` is regarded as integer literal with suffix `f32`. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use litrs::Literal; |
| /// |
| /// assert_eq!(Literal::parse(r##"3.14f33"##).unwrap().suffix(), "f33"); |
| /// assert_eq!(Literal::parse(r##"123hackerman"##).unwrap().suffix(), "hackerman"); |
| /// assert_eq!(Literal::parse(r##"0x0fuck"##).unwrap().suffix(), "uck"); |
| /// assert_eq!(Literal::parse(r##"'🦊'good_boy"##).unwrap().suffix(), "good_boy"); |
| /// assert_eq!(Literal::parse(r##""toph"beifong"##).unwrap().suffix(), "beifong"); |
| /// ``` |
| pub fn suffix(&self) -> &str { |
| match self { |
| Literal::Bool(_) => "", |
| Literal::Integer(l) => l.suffix(), |
| Literal::Float(l) => l.suffix(), |
| Literal::Char(l) => l.suffix(), |
| Literal::String(l) => l.suffix(), |
| Literal::Byte(l) => l.suffix(), |
| Literal::ByteString(l) => l.suffix(), |
| } |
| } |
| } |
| |
| impl Literal<&str> { |
| /// Makes a copy of the underlying buffer and returns the owned version of |
| /// `Self`. |
| pub fn into_owned(self) -> Literal<String> { |
| match self { |
| Literal::Bool(l) => Literal::Bool(l.to_owned()), |
| Literal::Integer(l) => Literal::Integer(l.to_owned()), |
| Literal::Float(l) => Literal::Float(l.to_owned()), |
| Literal::Char(l) => Literal::Char(l.to_owned()), |
| Literal::String(l) => Literal::String(l.into_owned()), |
| Literal::Byte(l) => Literal::Byte(l.to_owned()), |
| Literal::ByteString(l) => Literal::ByteString(l.into_owned()), |
| } |
| } |
| } |
| |
| impl<B: Buffer> fmt::Display for Literal<B> { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| match self { |
| Literal::Bool(l) => l.fmt(f), |
| Literal::Integer(l) => l.fmt(f), |
| Literal::Float(l) => l.fmt(f), |
| Literal::Char(l) => l.fmt(f), |
| Literal::String(l) => l.fmt(f), |
| Literal::Byte(l) => l.fmt(f), |
| Literal::ByteString(l) => l.fmt(f), |
| } |
| } |
| } |
| |
| |
| // ============================================================================================== |
| // ===== Buffer |
| // ============================================================================================== |
| |
| /// A shared or owned string buffer. Implemented for `String` and `&str`. *Implementation detail*. |
| /// |
| /// This is trait is implementation detail of this library, cannot be |
| /// implemented in other crates and is not subject to semantic versioning. |
| /// `litrs` only guarantees that this trait is implemented for `String` and |
| /// `for<'a> &'a str`. |
| pub trait Buffer: sealed::Sealed + Deref<Target = str> { |
| /// This is `Cow<'static, str>` for `String`, and `Cow<'a, str>` for `&'a str`. |
| type Cow: From<String> + AsRef<str> + Borrow<str> + Deref<Target = str>; |
| |
| #[doc(hidden)] |
| fn into_cow(self) -> Self::Cow; |
| |
| /// This is `Cow<'static, [u8]>` for `String`, and `Cow<'a, [u8]>` for `&'a str`. |
| type ByteCow: From<Vec<u8>> + AsRef<[u8]> + Borrow<[u8]> + Deref<Target = [u8]>; |
| |
| #[doc(hidden)] |
| fn into_byte_cow(self) -> Self::ByteCow; |
| |
| /// Cuts away some characters at the beginning and some at the end. Given |
| /// range has to be in bounds. |
| #[doc(hidden)] |
| fn cut(self, range: Range<usize>) -> Self; |
| } |
| |
| mod sealed { |
| pub trait Sealed {} |
| } |
| |
| impl<'a> sealed::Sealed for &'a str {} |
| impl<'a> Buffer for &'a str { |
| #[doc(hidden)] |
| fn cut(self, range: Range<usize>) -> Self { |
| &self[range] |
| } |
| |
| type Cow = Cow<'a, str>; |
| #[doc(hidden)] |
| fn into_cow(self) -> Self::Cow { |
| self.into() |
| } |
| type ByteCow = Cow<'a, [u8]>; |
| #[doc(hidden)] |
| fn into_byte_cow(self) -> Self::ByteCow { |
| self.as_bytes().into() |
| } |
| } |
| |
| impl sealed::Sealed for String {} |
| impl Buffer for String { |
| #[doc(hidden)] |
| fn cut(mut self, range: Range<usize>) -> Self { |
| // This is not the most efficient way, but it works. First we cut the |
| // end, then the beginning. Note that `drain` also removes the range if |
| // the iterator is not consumed. |
| self.truncate(range.end); |
| self.drain(..range.start); |
| self |
| } |
| |
| type Cow = Cow<'static, str>; |
| #[doc(hidden)] |
| fn into_cow(self) -> Self::Cow { |
| self.into() |
| } |
| |
| type ByteCow = Cow<'static, [u8]>; |
| #[doc(hidden)] |
| fn into_byte_cow(self) -> Self::ByteCow { |
| self.into_bytes().into() |
| } |
| } |