| /// A type that wraps a single byte with a convenient fmt::Debug impl that |
| /// escapes the byte. |
| pub(crate) struct Byte(pub(crate) u8); |
| |
| impl core::fmt::Debug for Byte { |
| fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| // Special case ASCII space. It's too hard to read otherwise, so |
| // put quotes around it. I sometimes wonder whether just '\x20' would |
| // be better... |
| if self.0 == b' ' { |
| return write!(f, "' '"); |
| } |
| // 10 bytes is enough to cover any output from ascii::escape_default. |
| let mut bytes = [0u8; 10]; |
| let mut len = 0; |
| for (i, mut b) in core::ascii::escape_default(self.0).enumerate() { |
| // capitalize \xab to \xAB |
| if i >= 2 && b'a' <= b && b <= b'f' { |
| b -= 32; |
| } |
| bytes[len] = b; |
| len += 1; |
| } |
| write!(f, "{}", core::str::from_utf8(&bytes[..len]).unwrap()) |
| } |
| } |
| |
| /// A type that provides a human readable debug impl for arbitrary bytes. |
| /// |
| /// This generally works best when the bytes are presumed to be mostly UTF-8, |
| /// but will work for anything. |
| /// |
| /// N.B. This is copied nearly verbatim from regex-automata. Sigh. |
| pub(crate) struct Bytes<'a>(pub(crate) &'a [u8]); |
| |
| impl<'a> core::fmt::Debug for Bytes<'a> { |
| fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { |
| write!(f, "\"")?; |
| // This is a sad re-implementation of a similar impl found in bstr. |
| let mut bytes = self.0; |
| while let Some(result) = utf8_decode(bytes) { |
| let ch = match result { |
| Ok(ch) => ch, |
| Err(byte) => { |
| write!(f, r"\x{:02x}", byte)?; |
| bytes = &bytes[1..]; |
| continue; |
| } |
| }; |
| bytes = &bytes[ch.len_utf8()..]; |
| match ch { |
| '\0' => write!(f, "\\0")?, |
| // ASCII control characters except \0, \n, \r, \t |
| '\x01'..='\x08' |
| | '\x0b' |
| | '\x0c' |
| | '\x0e'..='\x19' |
| | '\x7f' => { |
| write!(f, "\\x{:02x}", u32::from(ch))?; |
| } |
| '\n' | '\r' | '\t' | _ => { |
| write!(f, "{}", ch.escape_debug())?; |
| } |
| } |
| } |
| write!(f, "\"")?; |
| Ok(()) |
| } |
| } |
| |
| /// Decodes the next UTF-8 encoded codepoint from the given byte slice. |
| /// |
| /// If no valid encoding of a codepoint exists at the beginning of the given |
| /// byte slice, then the first byte is returned instead. |
| /// |
| /// This returns `None` if and only if `bytes` is empty. |
| pub(crate) fn utf8_decode(bytes: &[u8]) -> Option<Result<char, u8>> { |
| fn len(byte: u8) -> Option<usize> { |
| if byte <= 0x7F { |
| return Some(1); |
| } else if byte & 0b1100_0000 == 0b1000_0000 { |
| return None; |
| } else if byte <= 0b1101_1111 { |
| Some(2) |
| } else if byte <= 0b1110_1111 { |
| Some(3) |
| } else if byte <= 0b1111_0111 { |
| Some(4) |
| } else { |
| None |
| } |
| } |
| |
| if bytes.is_empty() { |
| return None; |
| } |
| let len = match len(bytes[0]) { |
| None => return Some(Err(bytes[0])), |
| Some(len) if len > bytes.len() => return Some(Err(bytes[0])), |
| Some(1) => return Some(Ok(char::from(bytes[0]))), |
| Some(len) => len, |
| }; |
| match core::str::from_utf8(&bytes[..len]) { |
| Ok(s) => Some(Ok(s.chars().next().unwrap())), |
| Err(_) => Some(Err(bytes[0])), |
| } |
| } |