vendor/wast-219.0.0/src/lexer.rs - toolchain/rustc - Git at Google

 //! Definition of a lexer for the WebAssembly text format.
 //!
 //! This module provides a [`Lexer`][] type which is an iterate over the raw
 //! tokens of a WebAssembly text file. A [`Lexer`][] accounts for every single
 //! byte in a WebAssembly text field, returning tokens even for comments and
 //! whitespace. Typically you'll ignore comments and whitespace, however.
 //!
 //! If you'd like to iterate over the tokens in a file you can do so via:
 //!
 //! ```
 //! # fn foo() -> Result<(), wast::Error> {
 //! use wast::lexer::Lexer;
 //!
 //! let wat = "(module (func $foo))";
 //! for token in Lexer::new(wat).iter(0) {
 //!     println!("{:?}", token?);
 //! }
 //! # Ok(())
 //! # }
 //! ```
 //!
 //! Note that you'll typically not use this module but will rather use
 //! [`ParseBuffer`](crate::parser::ParseBuffer) instead.
 //!
 //! [`Lexer`]: crate::lexer::Lexer

 use crate::token::Span;
 use crate::Error;
 use std::borrow::Cow;
 use std::char;
 use std::fmt;
 use std::slice;
 use std::str;
 use std::str::Utf8Error;

 /// A structure used to lex the s-expression syntax of WAT files.
 ///
 /// This structure is used to generate [`Token`] items, which should account for
 /// every single byte of the input as we iterate over it. A [`LexError`] is
 /// returned for any non-lexable text.
 #[derive(Clone)]
 pub struct Lexer<'a> {
     input: &'a str,
     allow_confusing_unicode: bool,
 }

 /// A single token parsed from a `Lexer`.
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub struct Token {
     /// The kind of token this represents, such as whether it's whitespace, a
     /// keyword, etc.
     pub kind: TokenKind,
     /// The byte offset within the original source for where this token came
     /// from.
     pub offset: usize,
     /// The byte length of this token as it resides in the original source.
     //
     // NB: this is `u32` to enable packing `Token` into two pointers of size.
     // This does limit a single token to being at most 4G large, but that seems
     // probably ok.
     pub len: u32,
 }

 #[test]
 fn token_is_not_too_big() {
     assert!(std::mem::size_of::<Token>() <= std::mem::size_of::<u64>() * 2);
 }

 /// Classification of what was parsed from the input stream.
 ///
 /// This enumeration contains all kinds of fragments, including comments and
 /// whitespace.
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub enum TokenKind {
     /// A line comment, preceded with `;;`
     LineComment,

     /// A block comment, surrounded by `(;` and `;)`. Note that these can be
     /// nested.
     BlockComment,

     /// A fragment of source that represents whitespace.
     Whitespace,

     /// A left-parenthesis, including the source text for where it comes from.
     LParen,
     /// A right-parenthesis, including the source text for where it comes from.
     RParen,

     /// A string literal, which is actually a list of bytes.
     String,

     /// An identifier (like `$foo`).
     ///
     /// All identifiers start with `$` and the payload here is the original
     /// source text.
     Id,

     /// A keyword, or something that starts with an alphabetic character.
     ///
     /// The payload here is the original source text.
     Keyword,

     /// An annotation (like `@foo`).
     ///
     /// All annotations start with `@` and the payload will be the name of the
     /// annotation.
     Annotation,

     /// A reserved series of `idchar` symbols. Unknown what this is meant to be
     /// used for, you'll probably generate an error about an unexpected token.
     Reserved,

     /// An integer.
     Integer(IntegerKind),

     /// A float.
     Float(FloatKind),
 }

 /// Description of the parsed integer from the source.
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub struct IntegerKind {
     sign: Option<SignToken>,
     has_underscores: bool,
     hex: bool,
 }

 /// Description of a parsed float from the source.
 #[allow(missing_docs)]
 #[derive(Copy, Clone, Debug, PartialEq)]
 pub enum FloatKind {
     #[doc(hidden)]
     Inf { negative: bool },
     #[doc(hidden)]
     Nan { negative: bool },
     #[doc(hidden)]
     NanVal {
         negative: bool,
         has_underscores: bool,
     },
     #[doc(hidden)]
     Normal { has_underscores: bool, hex: bool },
 }

 enum ReservedKind {
     /// "..."
     String,
     /// anything that's just a sequence of `idchars!()`
     Idchars,
     /// $"..."
     IdString,
     /// @"..."
     AnnotationString,
     /// everything else (a conglomeration of strings, idchars, etc)
     Reserved,
 }

 /// Errors that can be generated while lexing.
 ///
 /// All lexing errors have line/colum/position information as well as a
 /// `LexError` indicating what kind of error happened while lexing.
 #[derive(Debug, Clone, PartialEq, Eq)]
 #[non_exhaustive]
 pub enum LexError {
     /// A dangling block comment was found with an unbalanced `(;` which was
     /// never terminated in the file.
     DanglingBlockComment,

     /// An unexpected character was encountered when generally parsing and
     /// looking for something else.
     Unexpected(char),

     /// An invalid `char` in a string literal was found.
     InvalidStringElement(char),

     /// An invalid string escape letter was found (the thing after the `\` in
     /// string literals)
     InvalidStringEscape(char),

     /// An invalid hexadecimal digit was found.
     InvalidHexDigit(char),

     /// An invalid base-10 digit was found.
     InvalidDigit(char),

     /// Parsing expected `wanted` but ended up finding `found` instead where the
     /// two characters aren't the same.
     Expected {
         /// The character that was expected to be found
         wanted: char,
         /// The character that was actually found
         found: char,
     },

     /// We needed to parse more but EOF (or end of the string) was encountered.
     UnexpectedEof,

     /// A number failed to parse because it was too big to fit within the target
     /// type.
     NumberTooBig,

     /// An invalid unicode value was found in a `\u{...}` escape in a string,
     /// only valid unicode scalars can be escaped that way.
     InvalidUnicodeValue(u32),

     /// A lone underscore was found when parsing a number, since underscores
     /// should always be preceded and succeeded with a digit of some form.
     LoneUnderscore,

     /// A "confusing" unicode character is present in a comment or a string
     /// literal, such as a character that changes the direction text is
     /// typically displayed in editors. This could cause the human-read
     /// version to behave differently than the compiler-visible version, so
     /// these are simply rejected for now.
     ConfusingUnicode(char),

     /// An invalid utf-8 sequence was found in a quoted identifier, such as
     /// `$"\ff"`.
     InvalidUtf8Id(Utf8Error),

     /// An empty identifier was found, or a lone `$`.
     EmptyId,

     /// An empty identifier was found, or a lone `@`.
     EmptyAnnotation,
 }

 /// A sign token for an integer.
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum SignToken {
     /// Plus sign: "+",
     Plus,
     /// Minus sign: "-",
     Minus,
 }

 /// A fully parsed integer from a source string with a payload ready to parse
 /// into an integral type.
 #[derive(Debug, PartialEq)]
 pub struct Integer<'a> {
     sign: Option<SignToken>,
     val: Cow<'a, str>,
     hex: bool,
 }

 /// Possible parsed float values
 #[derive(Debug, PartialEq, Eq)]
 pub enum Float<'a> {
     /// A float `NaN` representation
     Nan {
         /// The specific bits to encode for this float, optionally
         val: Option<Cow<'a, str>>,
         /// Whether or not this is a negative `NaN` or not.
         negative: bool,
     },
     /// An float infinite representation,
     Inf {
         #[allow(missing_docs)]
         negative: bool,
     },
     /// A parsed and separated floating point value
     Val {
         /// Whether or not the `integral` and `decimal` are specified in hex
         hex: bool,
         /// The float parts before the `.`
         integral: Cow<'a, str>,
         /// The float parts after the `.`
         decimal: Option<Cow<'a, str>>,
         /// The exponent to multiple this `integral.decimal` portion of the
         /// float by. If `hex` is true this is `2^exponent` and otherwise it's
         /// `10^exponent`
         exponent: Option<Cow<'a, str>>,
     },
 }

 // https://webassembly.github.io/spec/core/text/values.html#text-idchar
 macro_rules! idchars {
     () => {
         b'0'..=b'9'
         | b'A'..=b'Z'
         | b'a'..=b'z'
         | b'!'
         | b'#'
         | b'$'
         | b'%'
         | b'&'
         | b'\''
         | b'*'
         | b'+'
         | b'-'
         | b'.'
         | b'/'
         | b':'
         | b'<'
         | b'='
         | b'>'
         | b'?'
         | b'@'
         | b'\\'
         | b'^'
         | b'_'
         | b'`'
         | b'|'
         | b'~'
     }
 }

 impl<'a> Lexer<'a> {
     /// Creates a new lexer which will lex the `input` source string.
     pub fn new(input: &str) -> Lexer<'_> {
         Lexer {
             input,
             allow_confusing_unicode: false,
         }
     }

     /// Returns the original source input that we're lexing.
     pub fn input(&self) -> &'a str {
         self.input
     }

     /// Configures whether "confusing" unicode characters are allowed while
     /// lexing.
     ///
     /// If allowed then no error will happen if these characters are found, but
     /// otherwise if disallowed a lex error will be produced when these
     /// characters are found. Confusing characters are denied by default.
     ///
     /// For now "confusing characters" are primarily related to the "trojan
     /// source" problem where it refers to characters which cause humans to read
     /// text differently than this lexer, such as characters that alter the
     /// left-to-right display of the source code.
     pub fn allow_confusing_unicode(&mut self, allow: bool) -> &mut Self {
         self.allow_confusing_unicode = allow;
         self
     }

     /// Lexes the next at the byte position `pos` in the input.
     ///
     /// Returns `Some` if a token is found or `None` if we're at EOF.
     ///
     /// The `pos` argument will be updated to point to the next token on a
     /// successful parse.
     ///
     /// # Errors
     ///
     /// Returns an error if the input is malformed.
     pub fn parse(&self, pos: &mut usize) -> Result<Option<Token>, Error> {
         let offset = *pos;
         Ok(match self.parse_kind(pos)? {
             Some(kind) => Some(Token {
                 kind,
                 offset,
                 len: (*pos - offset).try_into().unwrap(),
             }),
             None => None,
         })
     }

     fn parse_kind(&self, pos: &mut usize) -> Result<Option<TokenKind>, Error> {
         let start = *pos;
         // This `match` generally parses the grammar specified at
         //
         // https://webassembly.github.io/spec/core/text/lexical.html#text-token
         let remaining = &self.input.as_bytes()[start..];
         let byte = match remaining.first() {
             Some(b) => b,
             None => return Ok(None),
         };

         match byte {
             // Open-parens check the next character to see if this is the start
             // of a block comment, otherwise it's just a bland left-paren
             // token.
             b'(' => match remaining.get(1) {
                 Some(b';') => {
                     let mut level = 1;
                     // Note that we're doing a byte-level search here for the
                     // close-delimiter of `;)`. The actual source text is utf-8
                     // encode in `remaining` but due to how utf-8 works we
                     // can safely search for an ASCII byte since it'll never
                     // otherwise appear in the middle of a codepoint and if we
                     // find it then it's guaranteed to be the right byte.
                     //
                     // Mainly we're avoiding the overhead of decoding utf-8
                     // characters into a Rust `char` since it's otherwise
                     // unnecessary work.
                     let mut iter = remaining[2..].iter();
                     while let Some(ch) = iter.next() {
                         match ch {
                             b'(' => {
                                 if let Some(b';') = iter.as_slice().first() {
                                     level += 1;
                                     iter.next();
                                 }
                             }
                             b';' => {
                                 if let Some(b')') = iter.as_slice().first() {
                                     level -= 1;
                                     iter.next();
                                     if level == 0 {
                                         let len = remaining.len() - iter.as_slice().len();
                                         let comment = &self.input[start..][..len];
                                         *pos += len;
                                         self.check_confusing_comment(*pos, comment)?;
                                         return Ok(Some(TokenKind::BlockComment));
                                     }
                                 }
                             }
                             _ => {}
                         }
                     }
                     Err(self.error(start, LexError::DanglingBlockComment))
                 }
                 _ => {
                     *pos += 1;

                     Ok(Some(TokenKind::LParen))
                 }
             },

             b')' => {
                 *pos += 1;
                 Ok(Some(TokenKind::RParen))
             }

             // https://webassembly.github.io/spec/core/text/lexical.html#white-space
             b' ' | b'\n' | b'\r' | b'\t' => {
                 self.skip_ws(pos);
                 Ok(Some(TokenKind::Whitespace))
             }

             c @ (idchars!() | b'"') => {
                 let (kind, src) = self.parse_reserved(pos)?;
                 match kind {
                     // If the reserved token was simply a single string then
                     // that is converted to a standalone string token
                     ReservedKind::String => return Ok(Some(TokenKind::String)),

                     // If only idchars were consumed then this could be a
                     // specific kind of standalone token we're interested in.
                     ReservedKind::Idchars => {
                         // https://webassembly.github.io/spec/core/text/values.html#integers
                         if let Some(ret) = self.classify_number(src) {
                             return Ok(Some(ret));
                         // https://webassembly.github.io/spec/core/text/values.html#text-id
                         } else if *c == b'$' {
                             return Ok(Some(TokenKind::Id));
                         // part of the WebAssembly/annotations proposal
                         // (no online url yet)
                         } else if *c == b'@' {
                             return Ok(Some(TokenKind::Annotation));
                         // https://webassembly.github.io/spec/core/text/lexical.html#text-keyword
                         } else if b'a' <= *c && *c <= b'z' {
                             return Ok(Some(TokenKind::Keyword));
                         }
                     }

                     ReservedKind::IdString => return Ok(Some(TokenKind::Id)),
                     ReservedKind::AnnotationString => return Ok(Some(TokenKind::Annotation)),

                     // ... otherwise this was a conglomeration of idchars,
                     // strings, or just idchars that don't match a prior rule,
                     // meaning this falls through to the fallback `Reserved`
                     // token.
                     ReservedKind::Reserved => {}
                 }

                 Ok(Some(TokenKind::Reserved))
             }

             // This could be a line comment, otherwise `;` is a reserved token.
             // The second byte is checked to see if it's a `;;` line comment
             //
             // Note that this character being considered as part of a
             // `reserved` token is part of the annotations proposal.
             b';' => match remaining.get(1) {
                 Some(b';') => {
                     let remaining = &self.input[*pos..];
                     let byte_pos = memchr::memchr2(b'\n', b'\r', remaining.as_bytes())
                         .unwrap_or(remaining.len());
                     *pos += byte_pos;
                     let comment = &remaining[..byte_pos];
                     self.check_confusing_comment(*pos, comment)?;
                     Ok(Some(TokenKind::LineComment))
                 }
                 _ => {
                     *pos += 1;
                     Ok(Some(TokenKind::Reserved))
                 }
             },

             // Other known reserved tokens other than `;`
             //
             // Note that these characters being considered as part of a
             // `reserved` token is part of the annotations proposal.
             b',' | b'[' | b']' | b'{' | b'}' => {
                 *pos += 1;
                 Ok(Some(TokenKind::Reserved))
             }

             _ => {
                 let ch = self.input[start..].chars().next().unwrap();
                 Err(self.error(*pos, LexError::Unexpected(ch)))
             }
         }
     }

     fn skip_ws(&self, pos: &mut usize) {
         // This table is a byte lookup table to determine whether a byte is a
         // whitespace byte. There are only 4 whitespace bytes for the `*.wat`
         // format right now which are ' ', '\t', '\r', and '\n'. These 4 bytes
         // have a '1' in the table below.
         //
         // Due to how utf-8 works (our input is guaranteed to be utf-8) it is
         // known that if these bytes are found they're guaranteed to be the
         // whitespace byte, so they can be safely skipped and we don't have to
         // do full utf-8 decoding. This means that the goal of this function is
         // to find the first non-whitespace byte in `remaining`.
         //
         // For now this lookup table seems to be the fastest, but projects like
         // https://github.com/lemire/despacer show other simd algorithms which
         // can possibly accelerate this even more. Note that `*.wat` files often
         // have a lot of whitespace so this function is typically quite hot when
         // parsing inputs.
         #[rustfmt::skip]
         const WS: [u8; 256] = [
             //                                   \t \n       \r
             /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
             /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             //        ' '
             /* 0x20 */ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         ];
         let remaining = &self.input[*pos..];
         let non_ws_pos = remaining
             .as_bytes()
             .iter()
             .position(|b| WS[*b as usize] != 1)
             .unwrap_or(remaining.len());
         *pos += non_ws_pos;
     }

     /// Splits off a "reserved" token which is then further processed later on
     /// to figure out which kind of token it is `depending on `ReservedKind`.
     ///
     /// For more information on this method see the clarification at
     /// <https://github.com/WebAssembly/spec/pull/1499> but the general gist is
     /// that this is parsing the grammar:
     ///
     /// ```text
     /// reserved := (idchar | string)+
     /// ```
     ///
     /// which means that it is eating any number of adjacent string/idchar
     /// tokens (e.g. `a"b"c`) and returning the classification of what was
     /// eaten. The classification assists in determining what the actual token
     /// here eaten looks like.
     fn parse_reserved(&self, pos: &mut usize) -> Result<(ReservedKind, &'a str), Error> {
         let mut idchars = 0u32;
         let mut strings = 0u32;
         let start = *pos;
         while let Some(byte) = self.input.as_bytes().get(*pos) {
             match byte {
                 // Normal `idchars` production which appends to the reserved
                 // token that's being produced.
                 idchars!() => {
                     idchars += 1;
                     *pos += 1;
                 }

                 // https://webassembly.github.io/spec/core/text/values.html#text-string
                 b'"' => {
                     strings += 1;
                     *pos += 1;
                     let mut it = self.input[*pos..].chars();
                     let result = Lexer::parse_str(&mut it, self.allow_confusing_unicode);
                     *pos = self.input.len() - it.as_str().len();
                     match result {
                         Ok(_) => {}
                         Err(e) => {
                             let err_pos = match &e {
                                 LexError::UnexpectedEof => self.input.len(),
                                 _ => self.input[..*pos].char_indices().next_back().unwrap().0,
                             };
                             return Err(self.error(err_pos, e));
                         }
                     }
                 }

                 // Nothing else is considered part of a reserved token
                 _ => break,
             }
         }
         let ret = &self.input[start..*pos];
         Ok(match (idchars, strings) {
             (0, 0) => unreachable!(),
             (0, 1) => (ReservedKind::String, ret),
             (_, 0) => (ReservedKind::Idchars, ret),
             // Pattern match `@"..."` and `$"..."` for string-based
             // identifiers and annotations.
             (1, 1) if ret.starts_with("$") => (ReservedKind::IdString, ret),
             (1, 1) if ret.starts_with("@") => (ReservedKind::AnnotationString, ret),
             _ => (ReservedKind::Reserved, ret),
         })
     }

     fn classify_number(&self, src: &str) -> Option<TokenKind> {
         let (sign, num) = if let Some(stripped) = src.strip_prefix('+') {
             (Some(SignToken::Plus), stripped)
         } else if let Some(stripped) = src.strip_prefix('-') {
             (Some(SignToken::Minus), stripped)
         } else {
             (None, src)
         };

         let negative = sign == Some(SignToken::Minus);

         // Handle `inf` and `nan` which are special numbers here
         if num == "inf" {
             return Some(TokenKind::Float(FloatKind::Inf { negative }));
         } else if num == "nan" {
             return Some(TokenKind::Float(FloatKind::Nan { negative }));
         } else if let Some(stripped) = num.strip_prefix("nan:0x") {
             let mut it = stripped.as_bytes().iter();
             let has_underscores = skip_underscores(&mut it, |x| char::from(x).is_ascii_hexdigit())?;
             if it.next().is_some() {
                 return None;
             }
             return Some(TokenKind::Float(FloatKind::NanVal {
                 negative,
                 has_underscores,
             }));
         }

         // Figure out if we're a hex number or not
         let test_valid: fn(u8) -> bool;
         let (mut it, hex) = if let Some(stripped) = num.strip_prefix("0x") {
             test_valid = |x: u8| char::from(x).is_ascii_hexdigit();
             (stripped.as_bytes().iter(), true)
         } else {
             test_valid = |x: u8| char::from(x).is_ascii_digit();
             (num.as_bytes().iter(), false)
         };

         // Evaluate the first part, moving out all underscores
         let mut has_underscores = skip_underscores(&mut it, test_valid)?;

         match it.clone().next() {
             // If we're followed by something this may be a float so keep going.
             Some(_) => {}

             // Otherwise this is a valid integer literal!
             None => {
                 return Some(TokenKind::Integer(IntegerKind {
                     has_underscores,
                     sign,
                     hex,
                 }))
             }
         }

         // A number can optionally be after the decimal so only actually try to
         // parse one if it's there.
         if it.clone().next() == Some(&b'.') {
             it.next();
             match it.clone().next() {
                 Some(c) if test_valid(*c) => {
                     if skip_underscores(&mut it, test_valid)? {
                         has_underscores = true;
                     }
                 }
                 Some(_) | None => {}
             }
         };

         // Figure out if there's an exponential part here to make a float, and
         // if so parse it but defer its actual calculation until later.
         match (hex, it.next()) {
             (true, Some(b'p')) | (true, Some(b'P')) | (false, Some(b'e')) | (false, Some(b'E')) => {
                 match it.clone().next() {
                     Some(b'-') => {
                         it.next();
                     }
                     Some(b'+') => {
                         it.next();
                     }
                     _ => {}
                 }
                 if skip_underscores(&mut it, |x| char::from(x).is_ascii_digit())? {
                     has_underscores = true;
                 }
             }
             (_, None) => {}
             _ => return None,
         }

         // We should have eaten everything by now, if not then this is surely
         // not a float or integer literal.
         if it.next().is_some() {
             return None;
         }

         return Some(TokenKind::Float(FloatKind::Normal {
             has_underscores,
             hex,
         }));

         fn skip_underscores<'a>(
             it: &mut slice::Iter<'_, u8>,
             good: fn(u8) -> bool,
         ) -> Option<bool> {
             let mut last_underscore = false;
             let mut has_underscores = false;
             let first = *it.next()?;
             if !good(first) {
                 return None;
             }
             while let Some(c) = it.clone().next() {
                 if *c == b'_' && !last_underscore {
                     has_underscores = true;
                     it.next();
                     last_underscore = true;
                     continue;
                 }
                 if !good(*c) {
                     break;
                 }
                 last_underscore = false;
                 it.next();
             }
             if last_underscore {
                 return None;
             }
             Some(has_underscores)
         }
     }

     /// Verifies that `comment`, which is about to be returned, has a "confusing
     /// unicode character" in it and should instead be transformed into an
     /// error.
     fn check_confusing_comment(&self, end: usize, comment: &str) -> Result<(), Error> {
         if self.allow_confusing_unicode {
             return Ok(());
         }

         // In an effort to avoid utf-8 decoding the entire `comment` the search
         // here is a bit more optimized. This checks for the `0xe2` byte because
         // in the utf-8 encoding that's the leading encoding byte for all
         // "confusing characters". Each instance of 0xe2 is checked to see if it
         // starts a confusing character, and if so that's returned.
         //
         // Also note that 0xe2 will never be found in the middle of a codepoint,
         // it's always the start of a codepoint. This means that if our special
         // characters show up they're guaranteed to start with 0xe2 bytes.
         let bytes = comment.as_bytes();
         for pos in memchr::Memchr::new(0xe2, bytes) {
             if let Some(c) = comment[pos..].chars().next() {
                 if is_confusing_unicode(c) {
                     // Note that `self.cur()` accounts for already having
                     // parsed `comment`, so we move backwards to where
                     // `comment` started and then add the index within
                     // `comment`.
                     let pos = end - comment.len() + pos;
                     return Err(self.error(pos, LexError::ConfusingUnicode(c)));
                 }
             }
         }

         Ok(())
     }

     fn parse_str(
         it: &mut str::Chars<'a>,
         allow_confusing_unicode: bool,
     ) -> Result<Cow<'a, [u8]>, LexError> {
         enum State {
             Start,
             String(Vec<u8>),
         }
         let orig = it.as_str();
         let mut state = State::Start;
         loop {
             match it.next().ok_or(LexError::UnexpectedEof)? {
                 '"' => break,
                 '\\' => {
                     match state {
                         State::String(_) => {}
                         State::Start => {
                             let pos = orig.len() - it.as_str().len() - 1;
                             state = State::String(orig[..pos].as_bytes().to_vec());
                         }
                     }
                     let buf = match &mut state {
                         State::String(b) => b,
                         State::Start => unreachable!(),
                     };
                     match it.next().ok_or(LexError::UnexpectedEof)? {
                         '"' => buf.push(b'"'),
                         '\'' => buf.push(b'\''),
                         't' => buf.push(b'\t'),
                         'n' => buf.push(b'\n'),
                         'r' => buf.push(b'\r'),
                         '\\' => buf.push(b'\\'),
                         'u' => {
                             Lexer::must_eat_char(it, '{')?;
                             let n = Lexer::hexnum(it)?;
                             let c = char::from_u32(n).ok_or(LexError::InvalidUnicodeValue(n))?;
                             buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes());
                             Lexer::must_eat_char(it, '}')?;
                         }
                         c1 if c1.is_ascii_hexdigit() => {
                             let c2 = Lexer::hexdigit(it)?;
                             buf.push(to_hex(c1) * 16 + c2);
                         }
                         c => return Err(LexError::InvalidStringEscape(c)),
                     }
                 }
                 c if (c as u32) < 0x20 || c as u32 == 0x7f => {
                     return Err(LexError::InvalidStringElement(c))
                 }
                 c if !allow_confusing_unicode && is_confusing_unicode(c) => {
                     return Err(LexError::ConfusingUnicode(c))
                 }
                 c => match &mut state {
                     State::Start => {}
                     State::String(v) => {
                         v.extend(c.encode_utf8(&mut [0; 4]).as_bytes());
                     }
                 },
             }
         }
         match state {
             State::Start => Ok(orig[..orig.len() - it.as_str().len() - 1].as_bytes().into()),
             State::String(s) => Ok(s.into()),
         }
     }

     /// Parses an id-or-string-based name from `it`.
     ///
     /// Note that `it` should already have been lexed and this is just
     /// extracting the value. If the token lexed was `@a` then this should point
     /// to `a`.
     ///
     /// This will automatically detect quoted syntax such as `@"..."` and the
     /// byte string will be parsed and validated as utf-8.
     ///
     /// # Errors
     ///
     /// Returns an error if a quoted byte string is found and contains invalid
     /// utf-8.
     fn parse_name(it: &mut str::Chars<'a>) -> Result<Cow<'a, str>, LexError> {
         if it.clone().next() == Some('"') {
             it.next();
             match Lexer::parse_str(it, true)? {
                 Cow::Borrowed(bytes) => match std::str::from_utf8(bytes) {
                     Ok(s) => Ok(Cow::Borrowed(s)),
                     Err(e) => Err(LexError::InvalidUtf8Id(e)),
                 },
                 Cow::Owned(bytes) => match String::from_utf8(bytes) {
                     Ok(s) => Ok(Cow::Owned(s)),
                     Err(e) => Err(LexError::InvalidUtf8Id(e.utf8_error())),
                 },
             }
         } else {
             Ok(Cow::Borrowed(it.as_str()))
         }
     }

     fn hexnum(it: &mut str::Chars<'_>) -> Result<u32, LexError> {
         let n = Lexer::hexdigit(it)?;
         let mut last_underscore = false;
         let mut n = n as u32;
         while let Some(c) = it.clone().next() {
             if c == '_' {
                 it.next();
                 last_underscore = true;
                 continue;
             }
             if !c.is_ascii_hexdigit() {
                 break;
             }
             last_underscore = false;
             it.next();
             n = n
                 .checked_mul(16)
                 .and_then(|n| n.checked_add(to_hex(c) as u32))
                 .ok_or(LexError::NumberTooBig)?;
         }
         if last_underscore {
             return Err(LexError::LoneUnderscore);
         }
         Ok(n)
     }

     /// Reads a hexidecimal digit from the input stream, returning where it's
     /// defined and the hex value. Returns an error on EOF or an invalid hex
     /// digit.
     fn hexdigit(it: &mut str::Chars<'_>) -> Result<u8, LexError> {
         let ch = Lexer::must_char(it)?;
         if ch.is_ascii_hexdigit() {
             Ok(to_hex(ch))
         } else {
             Err(LexError::InvalidHexDigit(ch))
         }
     }

     /// Reads the next character from the input string and where it's located,
     /// returning an error if the input stream is empty.
     fn must_char(it: &mut str::Chars<'_>) -> Result<char, LexError> {
         it.next().ok_or(LexError::UnexpectedEof)
     }

     /// Expects that a specific character must be read next
     fn must_eat_char(it: &mut str::Chars<'_>, wanted: char) -> Result<(), LexError> {
         let found = Lexer::must_char(it)?;
         if wanted == found {
             Ok(())
         } else {
             Err(LexError::Expected { wanted, found })
         }
     }

     /// Creates an error at `pos` with the specified `kind`
     fn error(&self, pos: usize, kind: LexError) -> Error {
         Error::lex(Span { offset: pos }, self.input, kind)
     }

     /// Returns an iterator over all tokens in the original source string
     /// starting at the `pos` specified.
     pub fn iter(&self, mut pos: usize) -> impl Iterator<Item = Result<Token, Error>> + '_ {
         std::iter::from_fn(move || self.parse(&mut pos).transpose())
     }

     /// Returns whether an annotation is present at `pos`. If it is present then
     /// `Ok(Some(token))` is returned corresponding to the token, otherwise
     /// `Ok(None)` is returned. If the next token cannot be parsed then an error
     /// is returned.
     pub fn annotation(&self, mut pos: usize) -> Result<Option<Token>, Error> {
         let bytes = self.input.as_bytes();
         // Quickly reject anything that for sure isn't an annotation since this
         // method is used every time an lparen is parsed.
         if bytes.get(pos) != Some(&b'@') {
             return Ok(None);
         }
         match self.parse(&mut pos)? {
             Some(token) => match token.kind {
                 TokenKind::Annotation => Ok(Some(token)),
                 _ => Ok(None),
             },
             None => Ok(None),
         }
     }
 }

 impl Token {
     /// Returns the original source text for this token.
     pub fn src<'a>(&self, s: &'a str) -> &'a str {
         &s[self.offset..][..self.len.try_into().unwrap()]
     }

     /// Returns the identifier, without the leading `$` symbol, that this token
     /// represents.
     ///
     /// Note that this method returns the contents of the identifier. With a
     /// string-based identifier this means that escapes have been resolved to
     /// their string-based equivalent.
     ///
     /// Should only be used with `TokenKind::Id`.
     ///
     /// # Errors
     ///
     /// Returns an error if this is a string-based identifier (e.g. `$"..."`)
     /// which is invalid utf-8.
     pub fn id<'a>(&self, s: &'a str) -> Result<Cow<'a, str>, Error> {
         let mut ch = self.src(s).chars();
         let dollar = ch.next();
         debug_assert_eq!(dollar, Some('$'));
         let id = Lexer::parse_name(&mut ch).map_err(|e| self.error(s, e))?;
         if id.is_empty() {
             return Err(self.error(s, LexError::EmptyId));
         }
         Ok(id)
     }

     /// Returns the annotation, without the leading `@` symbol, that this token
     /// represents.
     ///
     /// Note that this method returns the contents of the identifier. With a
     /// string-based identifier this means that escapes have been resolved to
     /// their string-based equivalent.
     ///
     /// Should only be used with `TokenKind::Annotation`.
     ///
     /// # Errors
     ///
     /// Returns an error if this is a string-based identifier (e.g. `$"..."`)
     /// which is invalid utf-8.
     pub fn annotation<'a>(&self, s: &'a str) -> Result<Cow<'a, str>, Error> {
         let mut ch = self.src(s).chars();
         let at = ch.next();
         debug_assert_eq!(at, Some('@'));
         let id = Lexer::parse_name(&mut ch).map_err(|e| self.error(s, e))?;
         if id.is_empty() {
             return Err(self.error(s, LexError::EmptyAnnotation));
         }
         Ok(id)
     }

     /// Returns the keyword this token represents.
     ///
     /// Should only be used with [`TokenKind::Keyword`].
     pub fn keyword<'a>(&self, s: &'a str) -> &'a str {
         self.src(s)
     }

     /// Returns the reserved string this token represents.
     ///
     /// Should only be used with [`TokenKind::Reserved`].
     pub fn reserved<'a>(&self, s: &'a str) -> &'a str {
         self.src(s)
     }

     /// Returns the parsed string that this token represents.
     ///
     /// This returns either a raw byte slice into the source if that's possible
     /// or an owned representation to handle escaped characters and such.
     ///
     /// Should only be used with [`TokenKind::String`].
     pub fn string<'a>(&self, s: &'a str) -> Cow<'a, [u8]> {
         let mut ch = self.src(s).chars();
         ch.next().unwrap();
         Lexer::parse_str(&mut ch, true).unwrap()
     }

     /// Returns the decomposed float token that this represents.
     ///
     /// This will slice up the float token into its component parts and return a
     /// description of the float token in the source.
     ///
     /// Should only be used with [`TokenKind::Float`].
     pub fn float<'a>(&self, s: &'a str, kind: FloatKind) -> Float<'a> {
         match kind {
             FloatKind::Inf { negative } => Float::Inf { negative },
             FloatKind::Nan { negative } => Float::Nan {
                 val: None,
                 negative,
             },
             FloatKind::NanVal {
                 negative,
                 has_underscores,
             } => {
                 let src = self.src(s);
                 let src = if src.starts_with("n") { src } else { &src[1..] };
                 let mut val = Cow::Borrowed(src.strip_prefix("nan:0x").unwrap());
                 if has_underscores {
                     *val.to_mut() = val.replace("_", "");
                 }
                 Float::Nan {
                     val: Some(val),
                     negative,
                 }
             }
             FloatKind::Normal {
                 has_underscores,
                 hex,
             } => {
                 let src = self.src(s);
                 let (integral, decimal, exponent) = match src.find('.') {
                     Some(i) => {
                         let integral = &src[..i];
                         let rest = &src[i + 1..];
                         let exponent = if hex {
                             rest.find('p').or_else(|| rest.find('P'))
                         } else {
                             rest.find('e').or_else(|| rest.find('E'))
                         };
                         match exponent {
                             Some(i) => (integral, Some(&rest[..i]), Some(&rest[i + 1..])),
                             None => (integral, Some(rest), None),
                         }
                     }
                     None => {
                         let exponent = if hex {
                             src.find('p').or_else(|| src.find('P'))
                         } else {
                             src.find('e').or_else(|| src.find('E'))
                         };
                         match exponent {
                             Some(i) => (&src[..i], None, Some(&src[i + 1..])),
                             None => (src, None, None),
                         }
                     }
                 };
                 let mut integral = Cow::Borrowed(integral.strip_prefix('+').unwrap_or(integral));
                 let mut decimal = decimal.and_then(|s| {
                     if s.is_empty() {
                         None
                     } else {
                         Some(Cow::Borrowed(s))
                     }
                 });
                 let mut exponent =
                     exponent.map(|s| Cow::Borrowed(s.strip_prefix('+').unwrap_or(s)));
                 if has_underscores {
                     *integral.to_mut() = integral.replace("_", "");
                     if let Some(decimal) = &mut decimal {
                         *decimal.to_mut() = decimal.replace("_", "");
                     }
                     if let Some(exponent) = &mut exponent {
                         *exponent.to_mut() = exponent.replace("_", "");
                     }
                 }
                 if hex {
                     *integral.to_mut() = integral.replace("0x", "");
                 }
                 Float::Val {
                     hex,
                     integral,
                     decimal,
                     exponent,
                 }
             }
         }
     }

     /// Returns the decomposed integer token that this represents.
     ///
     /// This will slice up the integer token into its component parts and
     /// return a description of the integer token in the source.
     ///
     /// Should only be used with [`TokenKind::Integer`].
     pub fn integer<'a>(&self, s: &'a str, kind: IntegerKind) -> Integer<'a> {
         let src = self.src(s);
         let val = match kind.sign {
             Some(SignToken::Plus) => src.strip_prefix('+').unwrap(),
             Some(SignToken::Minus) => src,
             None => src,
         };
         let mut val = Cow::Borrowed(val);
         if kind.has_underscores {
             *val.to_mut() = val.replace("_", "");
         }
         if kind.hex {
             *val.to_mut() = val.replace("0x", "");
         }
         Integer {
             sign: kind.sign,
             hex: kind.hex,
             val,
         }
     }

     fn error(&self, src: &str, err: LexError) -> Error {
         Error::lex(
             Span {
                 offset: self.offset,
             },
             src,
             err,
         )
     }
 }

 impl<'a> Integer<'a> {
     /// Returns the sign token for this integer.
     pub fn sign(&self) -> Option<SignToken> {
         self.sign
     }

     /// Returns the value string that can be parsed for this integer, as well
     /// as the base that it should be parsed in
     pub fn val(&self) -> (&str, u32) {
         (&self.val, if self.hex { 16 } else { 10 })
     }
 }

 fn to_hex(c: char) -> u8 {
     match c {
         'a'..='f' => c as u8 - b'a' + 10,
         'A'..='F' => c as u8 - b'A' + 10,
         _ => c as u8 - b'0',
     }
 }

 impl fmt::Display for LexError {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         use LexError::*;
         match self {
             DanglingBlockComment => f.write_str("unterminated block comment")?,
             Unexpected(c) => write!(f, "unexpected character '{}'", escape_char(*c))?,
             InvalidStringElement(c) => {
                 write!(f, "invalid character in string '{}'", escape_char(*c))?
             }
             InvalidStringEscape(c) => write!(f, "invalid string escape '{}'", escape_char(*c))?,
             InvalidHexDigit(c) => write!(f, "invalid hex digit '{}'", escape_char(*c))?,
             InvalidDigit(c) => write!(f, "invalid decimal digit '{}'", escape_char(*c))?,
             Expected { wanted, found } => write!(
                 f,
                 "expected '{}' but found '{}'",
                 escape_char(*wanted),
                 escape_char(*found)
             )?,
             UnexpectedEof => write!(f, "unexpected end-of-file")?,
             NumberTooBig => f.write_str("number is too big to parse")?,
             InvalidUnicodeValue(c) => write!(f, "invalid unicode scalar value 0x{:x}", c)?,
             LoneUnderscore => write!(f, "bare underscore in numeric literal")?,
             ConfusingUnicode(c) => write!(f, "likely-confusing unicode character found {:?}", c)?,
             InvalidUtf8Id(_) => write!(f, "malformed UTF-8 encoding of string-based id")?,
             EmptyId => write!(f, "empty identifier")?,
             EmptyAnnotation => write!(f, "empty annotation id")?,
         }
         Ok(())
     }
 }

 fn escape_char(c: char) -> String {
     match c {
         '\t' => String::from("\\t"),
         '\r' => String::from("\\r"),
         '\n' => String::from("\\n"),
         '\\' => String::from("\\\\"),
         '\'' => String::from("\\\'"),
         '\"' => String::from("\""),
         '\x20'..='\x7e' => String::from(c),
         _ => c.escape_unicode().to_string(),
     }
 }

 /// This is an attempt to protect agains the "trojan source" [1] problem where
 /// unicode characters can cause editors to render source code differently
 /// for humans than the compiler itself sees.
 ///
 /// To mitigate this issue, and because it's relatively rare in practice,
 /// this simply rejects characters of that form.
 ///
 /// [1]: https://www.trojansource.codes/
 fn is_confusing_unicode(ch: char) -> bool {
     matches!(
         ch,
         '\u{202a}'
             | '\u{202b}'
             | '\u{202d}'
             | '\u{202e}'
             | '\u{2066}'
             | '\u{2067}'
             | '\u{2068}'
             | '\u{206c}'
             | '\u{2069}'
     )
 }

 #[cfg(test)]
 mod tests {
     use super::*;

     #[test]
     fn ws_smoke() {
         fn get_whitespace(input: &str) -> &str {
             let token = get_token(input);
             match token.kind {
                 TokenKind::Whitespace => token.src(input),
                 other => panic!("unexpected {:?}", other),
             }
         }
         assert_eq!(get_whitespace(" "), " ");
         assert_eq!(get_whitespace("  "), "  ");
         assert_eq!(get_whitespace("  \n "), "  \n ");
         assert_eq!(get_whitespace("  x"), "  ");
         assert_eq!(get_whitespace("  ;"), "  ");
     }

     #[test]
     fn line_comment_smoke() {
         fn get_line_comment(input: &str) -> &str {
             let token = get_token(input);
             match token.kind {
                 TokenKind::LineComment => token.src(input),
                 other => panic!("unexpected {:?}", other),
             }
         }
         assert_eq!(get_line_comment(";;"), ";;");
         assert_eq!(get_line_comment(";; xyz"), ";; xyz");
         assert_eq!(get_line_comment(";; xyz\nabc"), ";; xyz");
         assert_eq!(get_line_comment(";;\nabc"), ";;");
         assert_eq!(get_line_comment(";;   \nabc"), ";;   ");
         assert_eq!(get_line_comment(";;   \rabc"), ";;   ");
         assert_eq!(get_line_comment(";;   \r\nabc"), ";;   ");
     }

     #[test]
     fn block_comment_smoke() {
         fn get_block_comment(input: &str) -> &str {
             let token = get_token(input);
             match token.kind {
                 TokenKind::BlockComment => token.src(input),
                 other => panic!("unexpected {:?}", other),
             }
         }
         assert_eq!(get_block_comment("(;;)"), "(;;)");
         assert_eq!(get_block_comment("(; ;)"), "(; ;)");
         assert_eq!(get_block_comment("(; (;;) ;)"), "(; (;;) ;)");
     }

     fn get_token(input: &str) -> Token {
         Lexer::new(input)
             .parse(&mut 0)
             .expect("no first token")
             .expect("no token")
     }

     #[test]
     fn lparen() {
         assert_eq!(get_token("((").kind, TokenKind::LParen);
     }

     #[test]
     fn rparen() {
         assert_eq!(get_token(")(").kind, TokenKind::RParen);
     }

     #[test]
     fn strings() {
         fn get_string(input: &str) -> Vec<u8> {
             let token = get_token(input);
             match token.kind {
                 TokenKind::String => token.string(input).to_vec(),
                 other => panic!("not keyword {:?}", other),
             }
         }
         assert_eq!(&*get_string("\"\""), b"");
         assert_eq!(&*get_string("\"a\""), b"a");
         assert_eq!(&*get_string("\"a b c d\""), b"a b c d");
         assert_eq!(&*get_string("\"\\\"\""), b"\"");
         assert_eq!(&*get_string("\"\\'\""), b"'");
         assert_eq!(&*get_string("\"\\n\""), b"\n");
         assert_eq!(&*get_string("\"\\t\""), b"\t");
         assert_eq!(&*get_string("\"\\r\""), b"\r");
         assert_eq!(&*get_string("\"\\\\\""), b"\\");
         assert_eq!(&*get_string("\"\\01\""), &[1]);
         assert_eq!(&*get_string("\"\\u{1}\""), &[1]);
         assert_eq!(
             &*get_string("\"\\u{0f3}\""),
             '\u{0f3}'.encode_utf8(&mut [0; 4]).as_bytes()
         );
         assert_eq!(
             &*get_string("\"\\u{0_f_3}\""),
             '\u{0f3}'.encode_utf8(&mut [0; 4]).as_bytes()
         );

         for i in 0..=255i32 {
             let s = format!("\"\\{:02x}\"", i);
             assert_eq!(&*get_string(&s), &[i as u8]);
         }
     }

     #[test]
     fn id() {
         fn get_id(input: &str) -> String {
             let token = get_token(input);
             match token.kind {
                 TokenKind::Id => token.id(input).unwrap().to_string(),
                 other => panic!("not id {:?}", other),
             }
         }
         assert_eq!(get_id("$x"), "x");
         assert_eq!(get_id("$xyz"), "xyz");
         assert_eq!(get_id("$x_z"), "x_z");
         assert_eq!(get_id("$0^"), "0^");
         assert_eq!(get_id("$0^;;"), "0^");
         assert_eq!(get_id("$0^ ;;"), "0^");
         assert_eq!(get_id("$\"x\" ;;"), "x");
     }

     #[test]
     fn annotation() {
         fn get_annotation(input: &str) -> String {
             let token = get_token(input);
             match token.kind {
                 TokenKind::Annotation => token.annotation(input).unwrap().to_string(),
                 other => panic!("not annotation {:?}", other),
             }
         }
         assert_eq!(get_annotation("@foo"), "foo");
         assert_eq!(get_annotation("@foo "), "foo");
         assert_eq!(get_annotation("@f "), "f");
         assert_eq!(get_annotation("@\"x\" "), "x");
         assert_eq!(get_annotation("@0 "), "0");
     }

     #[test]
     fn keyword() {
         fn get_keyword(input: &str) -> &str {
             let token = get_token(input);
             match token.kind {
                 TokenKind::Keyword => token.keyword(input),
                 other => panic!("not keyword {:?}", other),
             }
         }
         assert_eq!(get_keyword("x"), "x");
         assert_eq!(get_keyword("xyz"), "xyz");
         assert_eq!(get_keyword("x_z"), "x_z");
         assert_eq!(get_keyword("x_z "), "x_z");
         assert_eq!(get_keyword("x_z "), "x_z");
     }

     #[test]
     fn reserved() {
         fn get_reserved(input: &str) -> &str {
             let token = get_token(input);
             match token.kind {
                 TokenKind::Reserved => token.reserved(input),
                 other => panic!("not reserved {:?}", other),
             }
         }
         assert_eq!(get_reserved("^_x "), "^_x");
     }

     #[test]
     fn integer() {
         fn get_integer(input: &str) -> String {
             let token = get_token(input);
             match token.kind {
                 TokenKind::Integer(i) => token.integer(input, i).val.to_string(),
                 other => panic!("not integer {:?}", other),
             }
         }
         assert_eq!(get_integer("1"), "1");
         assert_eq!(get_integer("0"), "0");
         assert_eq!(get_integer("-1"), "-1");
         assert_eq!(get_integer("+1"), "1");
         assert_eq!(get_integer("+1_000"), "1000");
         assert_eq!(get_integer("+1_0_0_0"), "1000");
         assert_eq!(get_integer("+0x10"), "10");
         assert_eq!(get_integer("-0x10"), "-10");
         assert_eq!(get_integer("0x10"), "10");
     }

     #[test]
     fn float() {
         fn get_float(input: &str) -> Float<'_> {
             let token = get_token(input);
             match token.kind {
                 TokenKind::Float(f) => token.float(input, f),
                 other => panic!("not float {:?}", other),
             }
         }
         assert_eq!(
             get_float("nan"),
             Float::Nan {
                 val: None,
                 negative: false
             },
         );
         assert_eq!(
             get_float("-nan"),
             Float::Nan {
                 val: None,
                 negative: true,
             },
         );
         assert_eq!(
             get_float("+nan"),
             Float::Nan {
                 val: None,
                 negative: false,
             },
         );
         assert_eq!(
             get_float("+nan:0x1"),
             Float::Nan {
                 val: Some("1".into()),
                 negative: false,
             },
         );
         assert_eq!(
             get_float("nan:0x7f_ffff"),
             Float::Nan {
                 val: Some("7fffff".into()),
                 negative: false,
             },
         );
         assert_eq!(get_float("inf"), Float::Inf { negative: false });
         assert_eq!(get_float("-inf"), Float::Inf { negative: true });
         assert_eq!(get_float("+inf"), Float::Inf { negative: false });

         assert_eq!(
             get_float("1.2"),
             Float::Val {
                 integral: "1".into(),
                 decimal: Some("2".into()),
                 exponent: None,
                 hex: false,
             },
         );
         assert_eq!(
             get_float("1.2e3"),
             Float::Val {
                 integral: "1".into(),
                 decimal: Some("2".into()),
                 exponent: Some("3".into()),
                 hex: false,
             },
         );
         assert_eq!(
             get_float("-1_2.1_1E+0_1"),
             Float::Val {
                 integral: "-12".into(),
                 decimal: Some("11".into()),
                 exponent: Some("01".into()),
                 hex: false,
             },
         );
         assert_eq!(
             get_float("+1_2.1_1E-0_1"),
             Float::Val {
                 integral: "12".into(),
                 decimal: Some("11".into()),
                 exponent: Some("-01".into()),
                 hex: false,
             },
         );
         assert_eq!(
             get_float("0x1_2.3_4p5_6"),
             Float::Val {
                 integral: "12".into(),
                 decimal: Some("34".into()),
                 exponent: Some("56".into()),
                 hex: true,
             },
         );
         assert_eq!(
             get_float("+0x1_2.3_4P-5_6"),
             Float::Val {
                 integral: "12".into(),
                 decimal: Some("34".into()),
                 exponent: Some("-56".into()),
                 hex: true,
             },
         );
         assert_eq!(
             get_float("1."),
             Float::Val {
                 integral: "1".into(),
                 decimal: None,
                 exponent: None,
                 hex: false,
             },
         );
         assert_eq!(
             get_float("0x1p-24"),
             Float::Val {
                 integral: "1".into(),
                 decimal: None,
                 exponent: Some("-24".into()),
                 hex: true,
             },
         );
     }
 }