| use crate::binary_reader::WASM_MAGIC_NUMBER; |
| use crate::prelude::*; |
| #[cfg(feature = "features")] |
| use crate::WasmFeatures; |
| #[cfg(feature = "component-model")] |
| use crate::{ |
| limits::MAX_WASM_MODULE_SIZE, ComponentCanonicalSectionReader, ComponentExportSectionReader, |
| ComponentImportSectionReader, ComponentInstanceSectionReader, ComponentStartFunction, |
| ComponentTypeSectionReader, CoreTypeSectionReader, InstanceSectionReader, SectionLimited, |
| }; |
| use crate::{ |
| BinaryReader, BinaryReaderError, CustomSectionReader, DataSectionReader, ElementSectionReader, |
| ExportSectionReader, FromReader, FunctionBody, FunctionSectionReader, GlobalSectionReader, |
| ImportSectionReader, MemorySectionReader, Result, TableSectionReader, TagSectionReader, |
| TypeSectionReader, |
| }; |
| use core::fmt; |
| use core::iter; |
| use core::ops::Range; |
| |
| pub(crate) const WASM_MODULE_VERSION: u16 = 0x1; |
| |
| // Note that this started at `0xa` and we're incrementing up from there. When |
| // the component model is stabilized this will become 0x1. The changes here are: |
| // |
| // * [????-??-??] 0xa - original version |
| // * [2023-01-05] 0xb - `export` introduces an alias |
| // * [2023-02-06] 0xc - `export` has an optional type ascribed to it |
| // * [2023-05-10] 0xd - imports/exports drop URLs, new discriminator byte which |
| // allows for `(import (interface "...") ...)` syntax. |
| pub(crate) const WASM_COMPONENT_VERSION: u16 = 0xd; |
| |
| const KIND_MODULE: u16 = 0x00; |
| const KIND_COMPONENT: u16 = 0x01; |
| |
| /// The supported encoding formats for the parser. |
| #[derive(Debug, Clone, Copy, Eq, PartialEq)] |
| pub enum Encoding { |
| /// The encoding format is a WebAssembly module. |
| Module, |
| /// The encoding format is a WebAssembly component. |
| Component, |
| } |
| |
| /// An incremental parser of a binary WebAssembly module or component. |
| /// |
| /// This type is intended to be used to incrementally parse a WebAssembly module |
| /// or component as bytes become available for the module. This can also be used |
| /// to parse modules or components that are already entirely resident within memory. |
| /// |
| /// This primary function for a parser is the [`Parser::parse`] function which |
| /// will incrementally consume input. You can also use the [`Parser::parse_all`] |
| /// function to parse a module or component that is entirely resident in memory. |
| #[derive(Debug, Clone)] |
| pub struct Parser { |
| state: State, |
| offset: u64, |
| max_size: u64, |
| encoding: Encoding, |
| #[cfg(feature = "features")] |
| features: WasmFeatures, |
| } |
| |
| #[derive(Debug, Clone)] |
| enum State { |
| Header, |
| SectionStart, |
| FunctionBody { remaining: u32, len: u32 }, |
| } |
| |
| /// A successful return payload from [`Parser::parse`]. |
| /// |
| /// On success one of two possible values can be returned, either that more data |
| /// is needed to continue parsing or a chunk of the input was parsed, indicating |
| /// how much of it was parsed. |
| #[derive(Debug)] |
| pub enum Chunk<'a> { |
| /// This can be returned at any time and indicates that more data is needed |
| /// to proceed with parsing. Zero bytes were consumed from the input to |
| /// [`Parser::parse`]. The `u64` value here is a hint as to how many more |
| /// bytes are needed to continue parsing. |
| NeedMoreData(u64), |
| |
| /// A chunk was successfully parsed. |
| Parsed { |
| /// This many bytes of the `data` input to [`Parser::parse`] were |
| /// consumed to produce `payload`. |
| consumed: usize, |
| /// The value that we actually parsed. |
| payload: Payload<'a>, |
| }, |
| } |
| |
| /// Values that can be parsed from a WebAssembly module or component. |
| /// |
| /// This enumeration is all possible chunks of pieces that can be parsed by a |
| /// [`Parser`] from a binary WebAssembly module or component. Note that for many |
| /// sections the entire section is parsed all at once, whereas other functions, |
| /// like the code section, are parsed incrementally. This is a distinction where some |
| /// sections, like the type section, are required to be fully resident in memory |
| /// (fully downloaded) before proceeding. Other sections, like the code section, |
| /// can be processed in a streaming fashion where each function is extracted |
| /// individually so it can possibly be shipped to another thread while you wait |
| /// for more functions to get downloaded. |
| /// |
| /// Note that payloads, when returned, do not indicate that the module or component |
| /// is valid. For example when you receive a `Payload::TypeSection` the type |
| /// section itself has not yet actually been parsed. The reader returned will be |
| /// able to parse it, but you'll have to actually iterate the reader to do the |
| /// full parse. Each payload returned is intended to be a *window* into the |
| /// original `data` passed to [`Parser::parse`] which can be further processed |
| /// if necessary. |
| #[non_exhaustive] |
| pub enum Payload<'a> { |
| /// Indicates the header of a WebAssembly module or component. |
| Version { |
| /// The version number found in the header. |
| num: u16, |
| /// The encoding format being parsed. |
| encoding: Encoding, |
| /// The range of bytes that were parsed to consume the header of the |
| /// module or component. Note that this range is relative to the start |
| /// of the byte stream. |
| range: Range<usize>, |
| }, |
| |
| /// A module type section was received and the provided reader can be |
| /// used to parse the contents of the type section. |
| TypeSection(TypeSectionReader<'a>), |
| /// A module import section was received and the provided reader can be |
| /// used to parse the contents of the import section. |
| ImportSection(ImportSectionReader<'a>), |
| /// A module function section was received and the provided reader can be |
| /// used to parse the contents of the function section. |
| FunctionSection(FunctionSectionReader<'a>), |
| /// A module table section was received and the provided reader can be |
| /// used to parse the contents of the table section. |
| TableSection(TableSectionReader<'a>), |
| /// A module memory section was received and the provided reader can be |
| /// used to parse the contents of the memory section. |
| MemorySection(MemorySectionReader<'a>), |
| /// A module tag section was received, and the provided reader can be |
| /// used to parse the contents of the tag section. |
| TagSection(TagSectionReader<'a>), |
| /// A module global section was received and the provided reader can be |
| /// used to parse the contents of the global section. |
| GlobalSection(GlobalSectionReader<'a>), |
| /// A module export section was received, and the provided reader can be |
| /// used to parse the contents of the export section. |
| ExportSection(ExportSectionReader<'a>), |
| /// A module start section was received. |
| StartSection { |
| /// The start function index |
| func: u32, |
| /// The range of bytes that specify the `func` field, specified in |
| /// offsets relative to the start of the byte stream. |
| range: Range<usize>, |
| }, |
| /// A module element section was received and the provided reader can be |
| /// used to parse the contents of the element section. |
| ElementSection(ElementSectionReader<'a>), |
| /// A module data count section was received. |
| DataCountSection { |
| /// The number of data segments. |
| count: u32, |
| /// The range of bytes that specify the `count` field, specified in |
| /// offsets relative to the start of the byte stream. |
| range: Range<usize>, |
| }, |
| /// A module data section was received and the provided reader can be |
| /// used to parse the contents of the data section. |
| DataSection(DataSectionReader<'a>), |
| /// Indicator of the start of the code section of a WebAssembly module. |
| /// |
| /// This entry is returned whenever the code section starts. The `count` |
| /// field indicates how many entries are in this code section. After |
| /// receiving this start marker you're guaranteed that the next `count` |
| /// items will be either `CodeSectionEntry` or an error will be returned. |
| /// |
| /// This, unlike other sections, is intended to be used for streaming the |
| /// contents of the code section. The code section is not required to be |
| /// fully resident in memory when we parse it. Instead a [`Parser`] is |
| /// capable of parsing piece-by-piece of a code section. |
| CodeSectionStart { |
| /// The number of functions in this section. |
| count: u32, |
| /// The range of bytes that represent this section, specified in |
| /// offsets relative to the start of the byte stream. |
| range: Range<usize>, |
| /// The size, in bytes, of the remaining contents of this section. |
| /// |
| /// This can be used in combination with [`Parser::skip_section`] |
| /// where the caller will know how many bytes to skip before feeding |
| /// bytes into `Parser` again. |
| size: u32, |
| }, |
| /// An entry of the code section, a function, was parsed from a WebAssembly |
| /// module. |
| /// |
| /// This entry indicates that a function was successfully received from the |
| /// code section, and the payload here is the window into the original input |
| /// where the function resides. Note that the function itself has not been |
| /// parsed, it's only been outlined. You'll need to process the |
| /// `FunctionBody` provided to test whether it parses and/or is valid. |
| CodeSectionEntry(FunctionBody<'a>), |
| |
| /// A core module section was received and the provided parser can be |
| /// used to parse the nested module. |
| /// |
| /// This variant is special in that it returns a sub-`Parser`. Upon |
| /// receiving a `ModuleSection` it is expected that the returned |
| /// `Parser` will be used instead of the parent `Parser` until the parse has |
| /// finished. You'll need to feed data into the `Parser` returned until it |
| /// returns `Payload::End`. After that you'll switch back to the parent |
| /// parser to resume parsing the rest of the current component. |
| /// |
| /// Note that binaries will not be parsed correctly if you feed the data for |
| /// a nested module into the parent [`Parser`]. |
| #[cfg(feature = "component-model")] |
| ModuleSection { |
| /// The parser for the nested module. |
| parser: Parser, |
| /// The range of bytes that represent the nested module in the |
| /// original byte stream. |
| /// |
| /// Note that, to better support streaming parsing and validation, the |
| /// validator does *not* check that this range is in bounds. |
| unchecked_range: Range<usize>, |
| }, |
| /// A core instance section was received and the provided parser can be |
| /// used to parse the contents of the core instance section. |
| /// |
| /// Currently this section is only parsed in a component. |
| #[cfg(feature = "component-model")] |
| InstanceSection(InstanceSectionReader<'a>), |
| /// A core type section was received and the provided parser can be |
| /// used to parse the contents of the core type section. |
| /// |
| /// Currently this section is only parsed in a component. |
| #[cfg(feature = "component-model")] |
| CoreTypeSection(CoreTypeSectionReader<'a>), |
| /// A component section from a WebAssembly component was received and the |
| /// provided parser can be used to parse the nested component. |
| /// |
| /// This variant is special in that it returns a sub-`Parser`. Upon |
| /// receiving a `ComponentSection` it is expected that the returned |
| /// `Parser` will be used instead of the parent `Parser` until the parse has |
| /// finished. You'll need to feed data into the `Parser` returned until it |
| /// returns `Payload::End`. After that you'll switch back to the parent |
| /// parser to resume parsing the rest of the current component. |
| /// |
| /// Note that binaries will not be parsed correctly if you feed the data for |
| /// a nested component into the parent [`Parser`]. |
| #[cfg(feature = "component-model")] |
| ComponentSection { |
| /// The parser for the nested component. |
| parser: Parser, |
| /// The range of bytes that represent the nested component in the |
| /// original byte stream. |
| /// |
| /// Note that, to better support streaming parsing and validation, the |
| /// validator does *not* check that this range is in bounds. |
| unchecked_range: Range<usize>, |
| }, |
| /// A component instance section was received and the provided reader can be |
| /// used to parse the contents of the component instance section. |
| #[cfg(feature = "component-model")] |
| ComponentInstanceSection(ComponentInstanceSectionReader<'a>), |
| /// A component alias section was received and the provided reader can be |
| /// used to parse the contents of the component alias section. |
| #[cfg(feature = "component-model")] |
| ComponentAliasSection(SectionLimited<'a, crate::ComponentAlias<'a>>), |
| /// A component type section was received and the provided reader can be |
| /// used to parse the contents of the component type section. |
| #[cfg(feature = "component-model")] |
| ComponentTypeSection(ComponentTypeSectionReader<'a>), |
| /// A component canonical section was received and the provided reader can be |
| /// used to parse the contents of the component canonical section. |
| #[cfg(feature = "component-model")] |
| ComponentCanonicalSection(ComponentCanonicalSectionReader<'a>), |
| /// A component start section was received. |
| #[cfg(feature = "component-model")] |
| ComponentStartSection { |
| /// The start function description. |
| start: ComponentStartFunction, |
| /// The range of bytes that specify the `start` field. |
| range: Range<usize>, |
| }, |
| /// A component import section was received and the provided reader can be |
| /// used to parse the contents of the component import section. |
| #[cfg(feature = "component-model")] |
| ComponentImportSection(ComponentImportSectionReader<'a>), |
| /// A component export section was received, and the provided reader can be |
| /// used to parse the contents of the component export section. |
| #[cfg(feature = "component-model")] |
| ComponentExportSection(ComponentExportSectionReader<'a>), |
| |
| /// A module or component custom section was received. |
| CustomSection(CustomSectionReader<'a>), |
| |
| /// An unknown section was found. |
| /// |
| /// This variant is returned for all unknown sections encountered. This |
| /// likely wants to be interpreted as an error by consumers of the parser, |
| /// but this can also be used to parse sections currently unsupported by |
| /// the parser. |
| UnknownSection { |
| /// The 8-bit identifier for this section. |
| id: u8, |
| /// The contents of this section. |
| contents: &'a [u8], |
| /// The range of bytes, relative to the start of the original data |
| /// stream, that the contents of this section reside in. |
| range: Range<usize>, |
| }, |
| |
| /// The end of the WebAssembly module or component was reached. |
| /// |
| /// The value is the offset in the input byte stream where the end |
| /// was reached. |
| End(usize), |
| } |
| |
| const CUSTOM_SECTION: u8 = 0; |
| const TYPE_SECTION: u8 = 1; |
| const IMPORT_SECTION: u8 = 2; |
| const FUNCTION_SECTION: u8 = 3; |
| const TABLE_SECTION: u8 = 4; |
| const MEMORY_SECTION: u8 = 5; |
| const GLOBAL_SECTION: u8 = 6; |
| const EXPORT_SECTION: u8 = 7; |
| const START_SECTION: u8 = 8; |
| const ELEMENT_SECTION: u8 = 9; |
| const CODE_SECTION: u8 = 10; |
| const DATA_SECTION: u8 = 11; |
| const DATA_COUNT_SECTION: u8 = 12; |
| const TAG_SECTION: u8 = 13; |
| |
| #[cfg(feature = "component-model")] |
| const COMPONENT_MODULE_SECTION: u8 = 1; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_CORE_INSTANCE_SECTION: u8 = 2; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_CORE_TYPE_SECTION: u8 = 3; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_SECTION: u8 = 4; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_INSTANCE_SECTION: u8 = 5; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_ALIAS_SECTION: u8 = 6; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_TYPE_SECTION: u8 = 7; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_CANONICAL_SECTION: u8 = 8; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_START_SECTION: u8 = 9; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_IMPORT_SECTION: u8 = 10; |
| #[cfg(feature = "component-model")] |
| const COMPONENT_EXPORT_SECTION: u8 = 11; |
| |
| impl Parser { |
| /// Creates a new parser. |
| /// |
| /// Reports errors and ranges relative to `offset` provided, where `offset` |
| /// is some logical offset within the input stream that we're parsing. |
| pub fn new(offset: u64) -> Parser { |
| Parser { |
| state: State::Header, |
| offset, |
| max_size: u64::MAX, |
| // Assume the encoding is a module until we know otherwise |
| encoding: Encoding::Module, |
| #[cfg(feature = "features")] |
| features: WasmFeatures::all(), |
| } |
| } |
| |
| /// Tests whether `bytes` looks like a core WebAssembly module. |
| /// |
| /// This will inspect the first 8 bytes of `bytes` and return `true` if it |
| /// starts with the standard core WebAssembly header. |
| pub fn is_core_wasm(bytes: &[u8]) -> bool { |
| const HEADER: [u8; 8] = [ |
| WASM_MAGIC_NUMBER[0], |
| WASM_MAGIC_NUMBER[1], |
| WASM_MAGIC_NUMBER[2], |
| WASM_MAGIC_NUMBER[3], |
| WASM_MODULE_VERSION.to_le_bytes()[0], |
| WASM_MODULE_VERSION.to_le_bytes()[1], |
| KIND_MODULE.to_le_bytes()[0], |
| KIND_MODULE.to_le_bytes()[1], |
| ]; |
| bytes.starts_with(&HEADER) |
| } |
| |
| /// Tests whether `bytes` looks like a WebAssembly component. |
| /// |
| /// This will inspect the first 8 bytes of `bytes` and return `true` if it |
| /// starts with the standard WebAssembly component header. |
| pub fn is_component(bytes: &[u8]) -> bool { |
| const HEADER: [u8; 8] = [ |
| WASM_MAGIC_NUMBER[0], |
| WASM_MAGIC_NUMBER[1], |
| WASM_MAGIC_NUMBER[2], |
| WASM_MAGIC_NUMBER[3], |
| WASM_COMPONENT_VERSION.to_le_bytes()[0], |
| WASM_COMPONENT_VERSION.to_le_bytes()[1], |
| KIND_COMPONENT.to_le_bytes()[0], |
| KIND_COMPONENT.to_le_bytes()[1], |
| ]; |
| bytes.starts_with(&HEADER) |
| } |
| |
| /// Returns the currently active set of wasm features that this parser is |
| /// using while parsing. |
| /// |
| /// The default set of features is [`WasmFeatures::all()`] for new parsers. |
| /// |
| /// For more information see [`BinaryReader::new`]. |
| #[cfg(feature = "features")] |
| pub fn features(&self) -> WasmFeatures { |
| self.features |
| } |
| |
| /// Sets the wasm features active while parsing to the `features` specified. |
| /// |
| /// The default set of features is [`WasmFeatures::all()`] for new parsers. |
| /// |
| /// For more information see [`BinaryReader::new`]. |
| #[cfg(feature = "features")] |
| pub fn set_features(&mut self, features: WasmFeatures) { |
| self.features = features; |
| } |
| |
| /// Returns the original offset that this parser is currently at. |
| pub fn offset(&self) -> u64 { |
| self.offset |
| } |
| |
| /// Attempts to parse a chunk of data. |
| /// |
| /// This method will attempt to parse the next incremental portion of a |
| /// WebAssembly binary. Data available for the module or component is |
| /// provided as `data`, and the data can be incomplete if more data has yet |
| /// to arrive. The `eof` flag indicates whether more data will ever be received. |
| /// |
| /// There are two ways parsing can succeed with this method: |
| /// |
| /// * `Chunk::NeedMoreData` - this indicates that there is not enough bytes |
| /// in `data` to parse a payload. The caller needs to wait for more data to |
| /// be available in this situation before calling this method again. It is |
| /// guaranteed that this is only returned if `eof` is `false`. |
| /// |
| /// * `Chunk::Parsed` - this indicates that a chunk of the input was |
| /// successfully parsed. The payload is available in this variant of what |
| /// was parsed, and this also indicates how many bytes of `data` was |
| /// consumed. It's expected that the caller will not provide these bytes |
| /// back to the [`Parser`] again. |
| /// |
| /// Note that all `Chunk` return values are connected, with a lifetime, to |
| /// the input buffer. Each parsed chunk borrows the input buffer and is a |
| /// view into it for successfully parsed chunks. |
| /// |
| /// It is expected that you'll call this method until `Payload::End` is |
| /// reached, at which point you're guaranteed that the parse has completed. |
| /// Note that complete parsing, for the top-level module or component, |
| /// implies that `data` is empty and `eof` is `true`. |
| /// |
| /// # Errors |
| /// |
| /// Parse errors are returned as an `Err`. Errors can happen when the |
| /// structure of the data is unexpected or if sections are too large for |
| /// example. Note that errors are not returned for malformed *contents* of |
| /// sections here. Sections are generally not individually parsed and each |
| /// returned [`Payload`] needs to be iterated over further to detect all |
| /// errors. |
| /// |
| /// # Examples |
| /// |
| /// An example of reading a wasm file from a stream (`std::io::Read`) and |
| /// incrementally parsing it. |
| /// |
| /// ``` |
| /// use std::io::Read; |
| /// use anyhow::Result; |
| /// use wasmparser::{Parser, Chunk, Payload::*}; |
| /// |
| /// fn parse(mut reader: impl Read) -> Result<()> { |
| /// let mut buf = Vec::new(); |
| /// let mut cur = Parser::new(0); |
| /// let mut eof = false; |
| /// let mut stack = Vec::new(); |
| /// |
| /// loop { |
| /// let (payload, consumed) = match cur.parse(&buf, eof)? { |
| /// Chunk::NeedMoreData(hint) => { |
| /// assert!(!eof); // otherwise an error would be returned |
| /// |
| /// // Use the hint to preallocate more space, then read |
| /// // some more data into our buffer. |
| /// // |
| /// // Note that the buffer management here is not ideal, |
| /// // but it's compact enough to fit in an example! |
| /// let len = buf.len(); |
| /// buf.extend((0..hint).map(|_| 0u8)); |
| /// let n = reader.read(&mut buf[len..])?; |
| /// buf.truncate(len + n); |
| /// eof = n == 0; |
| /// continue; |
| /// } |
| /// |
| /// Chunk::Parsed { consumed, payload } => (payload, consumed), |
| /// }; |
| /// |
| /// match payload { |
| /// // Sections for WebAssembly modules |
| /// Version { .. } => { /* ... */ } |
| /// TypeSection(_) => { /* ... */ } |
| /// ImportSection(_) => { /* ... */ } |
| /// FunctionSection(_) => { /* ... */ } |
| /// TableSection(_) => { /* ... */ } |
| /// MemorySection(_) => { /* ... */ } |
| /// TagSection(_) => { /* ... */ } |
| /// GlobalSection(_) => { /* ... */ } |
| /// ExportSection(_) => { /* ... */ } |
| /// StartSection { .. } => { /* ... */ } |
| /// ElementSection(_) => { /* ... */ } |
| /// DataCountSection { .. } => { /* ... */ } |
| /// DataSection(_) => { /* ... */ } |
| /// |
| /// // Here we know how many functions we'll be receiving as |
| /// // `CodeSectionEntry`, so we can prepare for that, and |
| /// // afterwards we can parse and handle each function |
| /// // individually. |
| /// CodeSectionStart { .. } => { /* ... */ } |
| /// CodeSectionEntry(body) => { |
| /// // here we can iterate over `body` to parse the function |
| /// // and its locals |
| /// } |
| /// |
| /// // Sections for WebAssembly components |
| /// InstanceSection(_) => { /* ... */ } |
| /// CoreTypeSection(_) => { /* ... */ } |
| /// ComponentInstanceSection(_) => { /* ... */ } |
| /// ComponentAliasSection(_) => { /* ... */ } |
| /// ComponentTypeSection(_) => { /* ... */ } |
| /// ComponentCanonicalSection(_) => { /* ... */ } |
| /// ComponentStartSection { .. } => { /* ... */ } |
| /// ComponentImportSection(_) => { /* ... */ } |
| /// ComponentExportSection(_) => { /* ... */ } |
| /// |
| /// ModuleSection { parser, .. } |
| /// | ComponentSection { parser, .. } => { |
| /// stack.push(cur.clone()); |
| /// cur = parser.clone(); |
| /// } |
| /// |
| /// CustomSection(_) => { /* ... */ } |
| /// |
| /// // Once we've reached the end of a parser we either resume |
| /// // at the parent parser or we break out of the loop because |
| /// // we're done. |
| /// End(_) => { |
| /// if let Some(parent_parser) = stack.pop() { |
| /// cur = parent_parser; |
| /// } else { |
| /// break; |
| /// } |
| /// } |
| /// |
| /// // most likely you'd return an error here |
| /// _ => { /* ... */ } |
| /// } |
| /// |
| /// // once we're done processing the payload we can forget the |
| /// // original. |
| /// buf.drain(..consumed); |
| /// } |
| /// |
| /// Ok(()) |
| /// } |
| /// |
| /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap(); |
| /// ``` |
| pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> { |
| let (data, eof) = if usize_to_u64(data.len()) > self.max_size { |
| (&data[..(self.max_size as usize)], true) |
| } else { |
| (data, eof) |
| }; |
| // TODO: thread through `offset: u64` to `BinaryReader`, remove |
| // the cast here. |
| let starting_offset = self.offset as usize; |
| let mut reader = BinaryReader::new(data, starting_offset); |
| #[cfg(feature = "features")] |
| { |
| reader.set_features(self.features); |
| } |
| match self.parse_reader(&mut reader, eof) { |
| Ok(payload) => { |
| // Be sure to update our offset with how far we got in the |
| // reader |
| let consumed = reader.original_position() - starting_offset; |
| self.offset += usize_to_u64(consumed); |
| self.max_size -= usize_to_u64(consumed); |
| Ok(Chunk::Parsed { |
| consumed: consumed, |
| payload, |
| }) |
| } |
| Err(e) => { |
| // If we're at EOF then there's no way we can recover from any |
| // error, so continue to propagate it. |
| if eof { |
| return Err(e); |
| } |
| |
| // If our error doesn't look like it can be resolved with more |
| // data being pulled down, then propagate it, otherwise switch |
| // the error to "feed me please" |
| match e.inner.needed_hint { |
| Some(hint) => Ok(Chunk::NeedMoreData(usize_to_u64(hint))), |
| None => Err(e), |
| } |
| } |
| } |
| } |
| |
| fn parse_reader<'a>( |
| &mut self, |
| reader: &mut BinaryReader<'a>, |
| eof: bool, |
| ) -> Result<Payload<'a>> { |
| use Payload::*; |
| |
| match self.state { |
| State::Header => { |
| let start = reader.original_position(); |
| let header_version = reader.read_header_version()?; |
| self.encoding = match (header_version >> 16) as u16 { |
| KIND_MODULE => Encoding::Module, |
| KIND_COMPONENT => Encoding::Component, |
| _ => bail!(start + 4, "unknown binary version: {header_version:#10x}"), |
| }; |
| let num = header_version as u16; |
| self.state = State::SectionStart; |
| Ok(Version { |
| num, |
| encoding: self.encoding, |
| range: start..reader.original_position(), |
| }) |
| } |
| State::SectionStart => { |
| // If we're at eof and there are no bytes in our buffer, then |
| // that means we reached the end of the data since it's |
| // just a bunch of sections concatenated after the header. |
| if eof && reader.bytes_remaining() == 0 { |
| return Ok(Payload::End(reader.original_position())); |
| } |
| |
| let id_pos = reader.original_position(); |
| let id = reader.read_u8()?; |
| if id & 0x80 != 0 { |
| return Err(BinaryReaderError::new("malformed section id", id_pos)); |
| } |
| let len_pos = reader.original_position(); |
| let mut len = reader.read_var_u32()?; |
| |
| // Test to make sure that this section actually fits within |
| // `Parser::max_size`. This doesn't matter for top-level modules |
| // but it is required for nested modules/components to correctly ensure |
| // that all sections live entirely within their section of the |
| // file. |
| let consumed = reader.original_position() - id_pos; |
| let section_overflow = self |
| .max_size |
| .checked_sub(usize_to_u64(consumed)) |
| .and_then(|s| s.checked_sub(len.into())) |
| .is_none(); |
| if section_overflow { |
| return Err(BinaryReaderError::new("section too large", len_pos)); |
| } |
| |
| match (self.encoding, id) { |
| // Sections for both modules and components. |
| (_, 0) => section(reader, len, CustomSectionReader::new, CustomSection), |
| |
| // Module sections |
| (Encoding::Module, TYPE_SECTION) => { |
| section(reader, len, TypeSectionReader::new, TypeSection) |
| } |
| (Encoding::Module, IMPORT_SECTION) => { |
| section(reader, len, ImportSectionReader::new, ImportSection) |
| } |
| (Encoding::Module, FUNCTION_SECTION) => { |
| section(reader, len, FunctionSectionReader::new, FunctionSection) |
| } |
| (Encoding::Module, TABLE_SECTION) => { |
| section(reader, len, TableSectionReader::new, TableSection) |
| } |
| (Encoding::Module, MEMORY_SECTION) => { |
| section(reader, len, MemorySectionReader::new, MemorySection) |
| } |
| (Encoding::Module, GLOBAL_SECTION) => { |
| section(reader, len, GlobalSectionReader::new, GlobalSection) |
| } |
| (Encoding::Module, EXPORT_SECTION) => { |
| section(reader, len, ExportSectionReader::new, ExportSection) |
| } |
| (Encoding::Module, START_SECTION) => { |
| let (func, range) = single_item(reader, len, "start")?; |
| Ok(StartSection { func, range }) |
| } |
| (Encoding::Module, ELEMENT_SECTION) => { |
| section(reader, len, ElementSectionReader::new, ElementSection) |
| } |
| (Encoding::Module, CODE_SECTION) => { |
| let start = reader.original_position(); |
| let count = delimited(reader, &mut len, |r| r.read_var_u32())?; |
| let range = start..reader.original_position() + len as usize; |
| self.state = State::FunctionBody { |
| remaining: count, |
| len, |
| }; |
| Ok(CodeSectionStart { |
| count, |
| range, |
| size: len, |
| }) |
| } |
| (Encoding::Module, DATA_SECTION) => { |
| section(reader, len, DataSectionReader::new, DataSection) |
| } |
| (Encoding::Module, DATA_COUNT_SECTION) => { |
| let (count, range) = single_item(reader, len, "data count")?; |
| Ok(DataCountSection { count, range }) |
| } |
| (Encoding::Module, TAG_SECTION) => { |
| section(reader, len, TagSectionReader::new, TagSection) |
| } |
| |
| // Component sections |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_MODULE_SECTION) |
| | (Encoding::Component, COMPONENT_SECTION) => { |
| if len as usize > MAX_WASM_MODULE_SIZE { |
| bail!( |
| len_pos, |
| "{} section is too large", |
| if id == 1 { "module" } else { "component " } |
| ); |
| } |
| |
| let range = reader.original_position() |
| ..reader.original_position() + usize::try_from(len).unwrap(); |
| self.max_size -= u64::from(len); |
| self.offset += u64::from(len); |
| let mut parser = Parser::new(usize_to_u64(reader.original_position())); |
| #[cfg(feature = "features")] |
| { |
| parser.features = self.features; |
| } |
| parser.max_size = u64::from(len); |
| |
| Ok(match id { |
| 1 => ModuleSection { |
| parser, |
| unchecked_range: range, |
| }, |
| 4 => ComponentSection { |
| parser, |
| unchecked_range: range, |
| }, |
| _ => unreachable!(), |
| }) |
| } |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_CORE_INSTANCE_SECTION) => { |
| section(reader, len, InstanceSectionReader::new, InstanceSection) |
| } |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_CORE_TYPE_SECTION) => { |
| section(reader, len, CoreTypeSectionReader::new, CoreTypeSection) |
| } |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_INSTANCE_SECTION) => section( |
| reader, |
| len, |
| ComponentInstanceSectionReader::new, |
| ComponentInstanceSection, |
| ), |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_ALIAS_SECTION) => { |
| section(reader, len, SectionLimited::new, ComponentAliasSection) |
| } |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_TYPE_SECTION) => section( |
| reader, |
| len, |
| ComponentTypeSectionReader::new, |
| ComponentTypeSection, |
| ), |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_CANONICAL_SECTION) => section( |
| reader, |
| len, |
| ComponentCanonicalSectionReader::new, |
| ComponentCanonicalSection, |
| ), |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_START_SECTION) => { |
| let (start, range) = single_item(reader, len, "component start")?; |
| Ok(ComponentStartSection { start, range }) |
| } |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_IMPORT_SECTION) => section( |
| reader, |
| len, |
| ComponentImportSectionReader::new, |
| ComponentImportSection, |
| ), |
| #[cfg(feature = "component-model")] |
| (Encoding::Component, COMPONENT_EXPORT_SECTION) => section( |
| reader, |
| len, |
| ComponentExportSectionReader::new, |
| ComponentExportSection, |
| ), |
| (_, id) => { |
| let offset = reader.original_position(); |
| let contents = reader.read_bytes(len as usize)?; |
| let range = offset..offset + len as usize; |
| Ok(UnknownSection { |
| id, |
| contents, |
| range, |
| }) |
| } |
| } |
| } |
| |
| // Once we hit 0 remaining incrementally parsed items, with 0 |
| // remaining bytes in each section, we're done and can switch back |
| // to parsing sections. |
| State::FunctionBody { |
| remaining: 0, |
| len: 0, |
| } => { |
| self.state = State::SectionStart; |
| self.parse_reader(reader, eof) |
| } |
| |
| // ... otherwise trailing bytes with no remaining entries in these |
| // sections indicates an error. |
| State::FunctionBody { remaining: 0, len } => { |
| debug_assert!(len > 0); |
| let offset = reader.original_position(); |
| Err(BinaryReaderError::new( |
| "trailing bytes at end of section", |
| offset, |
| )) |
| } |
| |
| // Functions are relatively easy to parse when we know there's at |
| // least one remaining and at least one byte available to read |
| // things. |
| // |
| // We use the remaining length try to read a u32 size of the |
| // function, and using that size we require the entire function be |
| // resident in memory. This means that we're reading whole chunks of |
| // functions at a time. |
| // |
| // Limiting via `Parser::max_size` (nested parsing) happens above in |
| // `fn parse`, and limiting by our section size happens via |
| // `delimited`. Actual parsing of the function body is delegated to |
| // the caller to iterate over the `FunctionBody` structure. |
| State::FunctionBody { remaining, mut len } => { |
| let body = delimited(reader, &mut len, |r| { |
| Ok(FunctionBody::new(r.read_reader()?)) |
| })?; |
| self.state = State::FunctionBody { |
| remaining: remaining - 1, |
| len, |
| }; |
| Ok(CodeSectionEntry(body)) |
| } |
| } |
| } |
| |
| /// Convenience function that can be used to parse a module or component |
| /// that is entirely resident in memory. |
| /// |
| /// This function will parse the `data` provided as a WebAssembly module |
| /// or component. |
| /// |
| /// Note that when this function yields sections that provide parsers, |
| /// no further action is required for those sections as payloads from |
| /// those parsers will be automatically returned. |
| /// |
| /// # Examples |
| /// |
| /// An example of reading a wasm file from a stream (`std::io::Read`) into |
| /// a buffer and then parsing it. |
| /// |
| /// ``` |
| /// use std::io::Read; |
| /// use anyhow::Result; |
| /// use wasmparser::{Parser, Chunk, Payload::*}; |
| /// |
| /// fn parse(mut reader: impl Read) -> Result<()> { |
| /// let mut buf = Vec::new(); |
| /// reader.read_to_end(&mut buf)?; |
| /// let parser = Parser::new(0); |
| /// |
| /// for payload in parser.parse_all(&buf) { |
| /// match payload? { |
| /// // Sections for WebAssembly modules |
| /// Version { .. } => { /* ... */ } |
| /// TypeSection(_) => { /* ... */ } |
| /// ImportSection(_) => { /* ... */ } |
| /// FunctionSection(_) => { /* ... */ } |
| /// TableSection(_) => { /* ... */ } |
| /// MemorySection(_) => { /* ... */ } |
| /// TagSection(_) => { /* ... */ } |
| /// GlobalSection(_) => { /* ... */ } |
| /// ExportSection(_) => { /* ... */ } |
| /// StartSection { .. } => { /* ... */ } |
| /// ElementSection(_) => { /* ... */ } |
| /// DataCountSection { .. } => { /* ... */ } |
| /// DataSection(_) => { /* ... */ } |
| /// |
| /// // Here we know how many functions we'll be receiving as |
| /// // `CodeSectionEntry`, so we can prepare for that, and |
| /// // afterwards we can parse and handle each function |
| /// // individually. |
| /// CodeSectionStart { .. } => { /* ... */ } |
| /// CodeSectionEntry(body) => { |
| /// // here we can iterate over `body` to parse the function |
| /// // and its locals |
| /// } |
| /// |
| /// // Sections for WebAssembly components |
| /// ModuleSection { .. } => { /* ... */ } |
| /// InstanceSection(_) => { /* ... */ } |
| /// CoreTypeSection(_) => { /* ... */ } |
| /// ComponentSection { .. } => { /* ... */ } |
| /// ComponentInstanceSection(_) => { /* ... */ } |
| /// ComponentAliasSection(_) => { /* ... */ } |
| /// ComponentTypeSection(_) => { /* ... */ } |
| /// ComponentCanonicalSection(_) => { /* ... */ } |
| /// ComponentStartSection { .. } => { /* ... */ } |
| /// ComponentImportSection(_) => { /* ... */ } |
| /// ComponentExportSection(_) => { /* ... */ } |
| /// |
| /// CustomSection(_) => { /* ... */ } |
| /// |
| /// // Once we've reached the end of a parser we either resume |
| /// // at the parent parser or the payload iterator is at its |
| /// // end and we're done. |
| /// End(_) => {} |
| /// |
| /// // most likely you'd return an error here, but if you want |
| /// // you can also inspect the raw contents of unknown sections |
| /// other => { |
| /// match other.as_section() { |
| /// Some((id, range)) => { /* ... */ } |
| /// None => { /* ... */ } |
| /// } |
| /// } |
| /// } |
| /// } |
| /// |
| /// Ok(()) |
| /// } |
| /// |
| /// # parse(&b"\0asm\x01\0\0\0"[..]).unwrap(); |
| /// ``` |
| pub fn parse_all(self, mut data: &[u8]) -> impl Iterator<Item = Result<Payload>> { |
| let mut stack = Vec::new(); |
| let mut cur = self; |
| let mut done = false; |
| iter::from_fn(move || { |
| if done { |
| return None; |
| } |
| let payload = match cur.parse(data, true) { |
| // Propagate all errors |
| Err(e) => { |
| done = true; |
| return Some(Err(e)); |
| } |
| |
| // This isn't possible because `eof` is always true. |
| Ok(Chunk::NeedMoreData(_)) => unreachable!(), |
| |
| Ok(Chunk::Parsed { payload, consumed }) => { |
| data = &data[consumed..]; |
| payload |
| } |
| }; |
| |
| match &payload { |
| #[cfg(feature = "component-model")] |
| Payload::ModuleSection { parser, .. } |
| | Payload::ComponentSection { parser, .. } => { |
| stack.push(cur.clone()); |
| cur = parser.clone(); |
| } |
| Payload::End(_) => match stack.pop() { |
| Some(p) => cur = p, |
| None => done = true, |
| }, |
| |
| _ => {} |
| } |
| |
| Some(Ok(payload)) |
| }) |
| } |
| |
| /// Skip parsing the code section entirely. |
| /// |
| /// This function can be used to indicate, after receiving |
| /// `CodeSectionStart`, that the section will not be parsed. |
| /// |
| /// The caller will be responsible for skipping `size` bytes (found in the |
| /// `CodeSectionStart` payload). Bytes should only be fed into `parse` |
| /// after the `size` bytes have been skipped. |
| /// |
| /// # Panics |
| /// |
| /// This function will panic if the parser is not in a state where it's |
| /// parsing the code section. |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use wasmparser::{Result, Parser, Chunk, Payload::*}; |
| /// use core::ops::Range; |
| /// |
| /// fn objdump_headers(mut wasm: &[u8]) -> Result<()> { |
| /// let mut parser = Parser::new(0); |
| /// loop { |
| /// let payload = match parser.parse(wasm, true)? { |
| /// Chunk::Parsed { consumed, payload } => { |
| /// wasm = &wasm[consumed..]; |
| /// payload |
| /// } |
| /// // this state isn't possible with `eof = true` |
| /// Chunk::NeedMoreData(_) => unreachable!(), |
| /// }; |
| /// match payload { |
| /// TypeSection(s) => print_range("type section", &s.range()), |
| /// ImportSection(s) => print_range("import section", &s.range()), |
| /// // .. other sections |
| /// |
| /// // Print the range of the code section we see, but don't |
| /// // actually iterate over each individual function. |
| /// CodeSectionStart { range, size, .. } => { |
| /// print_range("code section", &range); |
| /// parser.skip_section(); |
| /// wasm = &wasm[size as usize..]; |
| /// } |
| /// End(_) => break, |
| /// _ => {} |
| /// } |
| /// } |
| /// Ok(()) |
| /// } |
| /// |
| /// fn print_range(section: &str, range: &Range<usize>) { |
| /// println!("{:>40}: {:#010x} - {:#010x}", section, range.start, range.end); |
| /// } |
| /// ``` |
| pub fn skip_section(&mut self) { |
| let skip = match self.state { |
| State::FunctionBody { remaining: _, len } => len, |
| _ => panic!("wrong state to call `skip_section`"), |
| }; |
| self.offset += u64::from(skip); |
| self.max_size -= u64::from(skip); |
| self.state = State::SectionStart; |
| } |
| } |
| |
| fn usize_to_u64(a: usize) -> u64 { |
| a.try_into().unwrap() |
| } |
| |
| /// Parses an entire section resident in memory into a `Payload`. |
| /// |
| /// Requires that `len` bytes are resident in `reader` and uses `ctor`/`variant` |
| /// to construct the section to return. |
| fn section<'a, T>( |
| reader: &mut BinaryReader<'a>, |
| len: u32, |
| ctor: fn(BinaryReader<'a>) -> Result<T>, |
| variant: fn(T) -> Payload<'a>, |
| ) -> Result<Payload<'a>> { |
| let reader = reader.skip(|r| { |
| r.read_bytes(len as usize)?; |
| Ok(()) |
| })?; |
| // clear the hint for "need this many more bytes" here because we already |
| // read all the bytes, so it's not possible to read more bytes if this |
| // fails. |
| let reader = ctor(reader).map_err(clear_hint)?; |
| Ok(variant(reader)) |
| } |
| |
| /// Reads a section that is represented by a single uleb-encoded `u32`. |
| fn single_item<'a, T>( |
| reader: &mut BinaryReader<'a>, |
| len: u32, |
| desc: &str, |
| ) -> Result<(T, Range<usize>)> |
| where |
| T: FromReader<'a>, |
| { |
| let range = reader.original_position()..reader.original_position() + len as usize; |
| let mut content = reader.skip(|r| { |
| r.read_bytes(len as usize)?; |
| Ok(()) |
| })?; |
| // We can't recover from "unexpected eof" here because our entire section is |
| // already resident in memory, so clear the hint for how many more bytes are |
| // expected. |
| let ret = content.read().map_err(clear_hint)?; |
| if !content.eof() { |
| bail!( |
| content.original_position(), |
| "unexpected content in the {desc} section", |
| ); |
| } |
| Ok((ret, range)) |
| } |
| |
| /// Attempts to parse using `f`. |
| /// |
| /// This will update `*len` with the number of bytes consumed, and it will cause |
| /// a failure to be returned instead of the number of bytes consumed exceeds |
| /// what `*len` currently is. |
| fn delimited<'a, T>( |
| reader: &mut BinaryReader<'a>, |
| len: &mut u32, |
| f: impl FnOnce(&mut BinaryReader<'a>) -> Result<T>, |
| ) -> Result<T> { |
| let start = reader.original_position(); |
| let ret = f(reader)?; |
| *len = match (reader.original_position() - start) |
| .try_into() |
| .ok() |
| .and_then(|i| len.checked_sub(i)) |
| { |
| Some(i) => i, |
| None => return Err(BinaryReaderError::new("unexpected end-of-file", start)), |
| }; |
| Ok(ret) |
| } |
| |
| impl Default for Parser { |
| fn default() -> Parser { |
| Parser::new(0) |
| } |
| } |
| |
| impl Payload<'_> { |
| /// If this `Payload` represents a section in the original wasm module then |
| /// the section's id and range within the original wasm binary are returned. |
| /// |
| /// Not all payloads refer to entire sections, such as the `Version` and |
| /// `CodeSectionEntry` variants. These variants will return `None` from this |
| /// function. |
| /// |
| /// Otherwise this function will return `Some` where the first element is |
| /// the byte identifier for the section and the second element is the range |
| /// of the contents of the section within the original wasm binary. |
| /// |
| /// The purpose of this method is to enable tools to easily iterate over |
| /// entire sections if necessary and handle sections uniformly, for example |
| /// dropping custom sections while preserving all other sections. |
| pub fn as_section(&self) -> Option<(u8, Range<usize>)> { |
| use Payload::*; |
| |
| match self { |
| Version { .. } => None, |
| TypeSection(s) => Some((TYPE_SECTION, s.range())), |
| ImportSection(s) => Some((IMPORT_SECTION, s.range())), |
| FunctionSection(s) => Some((FUNCTION_SECTION, s.range())), |
| TableSection(s) => Some((TABLE_SECTION, s.range())), |
| MemorySection(s) => Some((MEMORY_SECTION, s.range())), |
| TagSection(s) => Some((TAG_SECTION, s.range())), |
| GlobalSection(s) => Some((GLOBAL_SECTION, s.range())), |
| ExportSection(s) => Some((EXPORT_SECTION, s.range())), |
| ElementSection(s) => Some((ELEMENT_SECTION, s.range())), |
| DataSection(s) => Some((DATA_SECTION, s.range())), |
| StartSection { range, .. } => Some((START_SECTION, range.clone())), |
| DataCountSection { range, .. } => Some((DATA_COUNT_SECTION, range.clone())), |
| CodeSectionStart { range, .. } => Some((CODE_SECTION, range.clone())), |
| CodeSectionEntry(_) => None, |
| |
| #[cfg(feature = "component-model")] |
| ModuleSection { |
| unchecked_range: range, |
| .. |
| } => Some((COMPONENT_MODULE_SECTION, range.clone())), |
| #[cfg(feature = "component-model")] |
| InstanceSection(s) => Some((COMPONENT_CORE_INSTANCE_SECTION, s.range())), |
| #[cfg(feature = "component-model")] |
| CoreTypeSection(s) => Some((COMPONENT_CORE_TYPE_SECTION, s.range())), |
| #[cfg(feature = "component-model")] |
| ComponentSection { |
| unchecked_range: range, |
| .. |
| } => Some((COMPONENT_SECTION, range.clone())), |
| #[cfg(feature = "component-model")] |
| ComponentInstanceSection(s) => Some((COMPONENT_INSTANCE_SECTION, s.range())), |
| #[cfg(feature = "component-model")] |
| ComponentAliasSection(s) => Some((COMPONENT_ALIAS_SECTION, s.range())), |
| #[cfg(feature = "component-model")] |
| ComponentTypeSection(s) => Some((COMPONENT_TYPE_SECTION, s.range())), |
| #[cfg(feature = "component-model")] |
| ComponentCanonicalSection(s) => Some((COMPONENT_CANONICAL_SECTION, s.range())), |
| #[cfg(feature = "component-model")] |
| ComponentStartSection { range, .. } => Some((COMPONENT_START_SECTION, range.clone())), |
| #[cfg(feature = "component-model")] |
| ComponentImportSection(s) => Some((COMPONENT_IMPORT_SECTION, s.range())), |
| #[cfg(feature = "component-model")] |
| ComponentExportSection(s) => Some((COMPONENT_EXPORT_SECTION, s.range())), |
| |
| CustomSection(c) => Some((CUSTOM_SECTION, c.range())), |
| |
| UnknownSection { id, range, .. } => Some((*id, range.clone())), |
| |
| End(_) => None, |
| } |
| } |
| } |
| |
| impl fmt::Debug for Payload<'_> { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| use Payload::*; |
| match self { |
| Version { |
| num, |
| encoding, |
| range, |
| } => f |
| .debug_struct("Version") |
| .field("num", num) |
| .field("encoding", encoding) |
| .field("range", range) |
| .finish(), |
| |
| // Module sections |
| TypeSection(_) => f.debug_tuple("TypeSection").field(&"...").finish(), |
| ImportSection(_) => f.debug_tuple("ImportSection").field(&"...").finish(), |
| FunctionSection(_) => f.debug_tuple("FunctionSection").field(&"...").finish(), |
| TableSection(_) => f.debug_tuple("TableSection").field(&"...").finish(), |
| MemorySection(_) => f.debug_tuple("MemorySection").field(&"...").finish(), |
| TagSection(_) => f.debug_tuple("TagSection").field(&"...").finish(), |
| GlobalSection(_) => f.debug_tuple("GlobalSection").field(&"...").finish(), |
| ExportSection(_) => f.debug_tuple("ExportSection").field(&"...").finish(), |
| ElementSection(_) => f.debug_tuple("ElementSection").field(&"...").finish(), |
| DataSection(_) => f.debug_tuple("DataSection").field(&"...").finish(), |
| StartSection { func, range } => f |
| .debug_struct("StartSection") |
| .field("func", func) |
| .field("range", range) |
| .finish(), |
| DataCountSection { count, range } => f |
| .debug_struct("DataCountSection") |
| .field("count", count) |
| .field("range", range) |
| .finish(), |
| CodeSectionStart { count, range, size } => f |
| .debug_struct("CodeSectionStart") |
| .field("count", count) |
| .field("range", range) |
| .field("size", size) |
| .finish(), |
| CodeSectionEntry(_) => f.debug_tuple("CodeSectionEntry").field(&"...").finish(), |
| |
| // Component sections |
| #[cfg(feature = "component-model")] |
| ModuleSection { |
| parser: _, |
| unchecked_range: range, |
| } => f |
| .debug_struct("ModuleSection") |
| .field("range", range) |
| .finish(), |
| #[cfg(feature = "component-model")] |
| InstanceSection(_) => f.debug_tuple("InstanceSection").field(&"...").finish(), |
| #[cfg(feature = "component-model")] |
| CoreTypeSection(_) => f.debug_tuple("CoreTypeSection").field(&"...").finish(), |
| #[cfg(feature = "component-model")] |
| ComponentSection { |
| parser: _, |
| unchecked_range: range, |
| } => f |
| .debug_struct("ComponentSection") |
| .field("range", range) |
| .finish(), |
| #[cfg(feature = "component-model")] |
| ComponentInstanceSection(_) => f |
| .debug_tuple("ComponentInstanceSection") |
| .field(&"...") |
| .finish(), |
| #[cfg(feature = "component-model")] |
| ComponentAliasSection(_) => f |
| .debug_tuple("ComponentAliasSection") |
| .field(&"...") |
| .finish(), |
| #[cfg(feature = "component-model")] |
| ComponentTypeSection(_) => f.debug_tuple("ComponentTypeSection").field(&"...").finish(), |
| #[cfg(feature = "component-model")] |
| ComponentCanonicalSection(_) => f |
| .debug_tuple("ComponentCanonicalSection") |
| .field(&"...") |
| .finish(), |
| #[cfg(feature = "component-model")] |
| ComponentStartSection { .. } => f |
| .debug_tuple("ComponentStartSection") |
| .field(&"...") |
| .finish(), |
| #[cfg(feature = "component-model")] |
| ComponentImportSection(_) => f |
| .debug_tuple("ComponentImportSection") |
| .field(&"...") |
| .finish(), |
| #[cfg(feature = "component-model")] |
| ComponentExportSection(_) => f |
| .debug_tuple("ComponentExportSection") |
| .field(&"...") |
| .finish(), |
| |
| CustomSection(c) => f.debug_tuple("CustomSection").field(c).finish(), |
| |
| UnknownSection { id, range, .. } => f |
| .debug_struct("UnknownSection") |
| .field("id", id) |
| .field("range", range) |
| .finish(), |
| |
| End(offset) => f.debug_tuple("End").field(offset).finish(), |
| } |
| } |
| } |
| |
| fn clear_hint(mut err: BinaryReaderError) -> BinaryReaderError { |
| err.inner.needed_hint = None; |
| err |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| macro_rules! assert_matches { |
| ($a:expr, $b:pat $(,)?) => { |
| match $a { |
| $b => {} |
| a => panic!("`{:?}` doesn't match `{}`", a, stringify!($b)), |
| } |
| }; |
| } |
| |
| #[test] |
| fn header() { |
| assert!(Parser::default().parse(&[], true).is_err()); |
| assert_matches!( |
| Parser::default().parse(&[], false), |
| Ok(Chunk::NeedMoreData(4)), |
| ); |
| assert_matches!( |
| Parser::default().parse(b"\0", false), |
| Ok(Chunk::NeedMoreData(3)), |
| ); |
| assert_matches!( |
| Parser::default().parse(b"\0asm", false), |
| Ok(Chunk::NeedMoreData(4)), |
| ); |
| assert_matches!( |
| Parser::default().parse(b"\0asm\x01\0\0\0", false), |
| Ok(Chunk::Parsed { |
| consumed: 8, |
| payload: Payload::Version { num: 1, .. }, |
| }), |
| ); |
| } |
| |
| #[test] |
| fn header_iter() { |
| for _ in Parser::default().parse_all(&[]) {} |
| for _ in Parser::default().parse_all(b"\0") {} |
| for _ in Parser::default().parse_all(b"\0asm") {} |
| for _ in Parser::default().parse_all(b"\0asm\x01\x01\x01\x01") {} |
| } |
| |
| fn parser_after_header() -> Parser { |
| let mut p = Parser::default(); |
| assert_matches!( |
| p.parse(b"\0asm\x01\0\0\0", false), |
| Ok(Chunk::Parsed { |
| consumed: 8, |
| payload: Payload::Version { |
| num: WASM_MODULE_VERSION, |
| encoding: Encoding::Module, |
| .. |
| }, |
| }), |
| ); |
| p |
| } |
| |
| fn parser_after_component_header() -> Parser { |
| let mut p = Parser::default(); |
| assert_matches!( |
| p.parse(b"\0asm\x0d\0\x01\0", false), |
| Ok(Chunk::Parsed { |
| consumed: 8, |
| payload: Payload::Version { |
| num: WASM_COMPONENT_VERSION, |
| encoding: Encoding::Component, |
| .. |
| }, |
| }), |
| ); |
| p |
| } |
| |
| #[test] |
| fn start_section() { |
| assert_matches!( |
| parser_after_header().parse(&[], false), |
| Ok(Chunk::NeedMoreData(1)), |
| ); |
| assert!(parser_after_header().parse(&[8], true).is_err()); |
| assert!(parser_after_header().parse(&[8, 1], true).is_err()); |
| assert!(parser_after_header().parse(&[8, 2], true).is_err()); |
| assert_matches!( |
| parser_after_header().parse(&[8], false), |
| Ok(Chunk::NeedMoreData(1)), |
| ); |
| assert_matches!( |
| parser_after_header().parse(&[8, 1], false), |
| Ok(Chunk::NeedMoreData(1)), |
| ); |
| assert_matches!( |
| parser_after_header().parse(&[8, 2], false), |
| Ok(Chunk::NeedMoreData(2)), |
| ); |
| assert_matches!( |
| parser_after_header().parse(&[8, 1, 1], false), |
| Ok(Chunk::Parsed { |
| consumed: 3, |
| payload: Payload::StartSection { func: 1, .. }, |
| }), |
| ); |
| assert!(parser_after_header().parse(&[8, 2, 1, 1], false).is_err()); |
| assert!(parser_after_header().parse(&[8, 0], false).is_err()); |
| } |
| |
| #[test] |
| fn end_works() { |
| assert_matches!( |
| parser_after_header().parse(&[], true), |
| Ok(Chunk::Parsed { |
| consumed: 0, |
| payload: Payload::End(8), |
| }), |
| ); |
| } |
| |
| #[test] |
| fn type_section() { |
| assert!(parser_after_header().parse(&[1], true).is_err()); |
| assert!(parser_after_header().parse(&[1, 0], false).is_err()); |
| assert!(parser_after_header().parse(&[8, 2], true).is_err()); |
| assert_matches!( |
| parser_after_header().parse(&[1], false), |
| Ok(Chunk::NeedMoreData(1)), |
| ); |
| assert_matches!( |
| parser_after_header().parse(&[1, 1], false), |
| Ok(Chunk::NeedMoreData(1)), |
| ); |
| assert_matches!( |
| parser_after_header().parse(&[1, 1, 1], false), |
| Ok(Chunk::Parsed { |
| consumed: 3, |
| payload: Payload::TypeSection(_), |
| }), |
| ); |
| assert_matches!( |
| parser_after_header().parse(&[1, 1, 1, 2, 3, 4], false), |
| Ok(Chunk::Parsed { |
| consumed: 3, |
| payload: Payload::TypeSection(_), |
| }), |
| ); |
| } |
| |
| #[test] |
| fn custom_section() { |
| assert!(parser_after_header().parse(&[0], true).is_err()); |
| assert!(parser_after_header().parse(&[0, 0], false).is_err()); |
| assert!(parser_after_header().parse(&[0, 1, 1], false).is_err()); |
| assert_matches!( |
| parser_after_header().parse(&[0, 2, 1], false), |
| Ok(Chunk::NeedMoreData(1)), |
| ); |
| assert_custom( |
| parser_after_header().parse(&[0, 1, 0], false).unwrap(), |
| 3, |
| "", |
| 11, |
| b"", |
| Range { start: 10, end: 11 }, |
| ); |
| assert_custom( |
| parser_after_header() |
| .parse(&[0, 2, 1, b'a'], false) |
| .unwrap(), |
| 4, |
| "a", |
| 12, |
| b"", |
| Range { start: 10, end: 12 }, |
| ); |
| assert_custom( |
| parser_after_header() |
| .parse(&[0, 2, 0, b'a'], false) |
| .unwrap(), |
| 4, |
| "", |
| 11, |
| b"a", |
| Range { start: 10, end: 12 }, |
| ); |
| } |
| |
| fn assert_custom( |
| chunk: Chunk<'_>, |
| expected_consumed: usize, |
| expected_name: &str, |
| expected_data_offset: usize, |
| expected_data: &[u8], |
| expected_range: Range<usize>, |
| ) { |
| let (consumed, s) = match chunk { |
| Chunk::Parsed { |
| consumed, |
| payload: Payload::CustomSection(s), |
| } => (consumed, s), |
| _ => panic!("not a custom section payload"), |
| }; |
| assert_eq!(consumed, expected_consumed); |
| assert_eq!(s.name(), expected_name); |
| assert_eq!(s.data_offset(), expected_data_offset); |
| assert_eq!(s.data(), expected_data); |
| assert_eq!(s.range(), expected_range); |
| } |
| |
| #[test] |
| fn function_section() { |
| assert!(parser_after_header().parse(&[10], true).is_err()); |
| assert!(parser_after_header().parse(&[10, 0], true).is_err()); |
| assert!(parser_after_header().parse(&[10, 1], true).is_err()); |
| assert_matches!( |
| parser_after_header().parse(&[10], false), |
| Ok(Chunk::NeedMoreData(1)) |
| ); |
| assert_matches!( |
| parser_after_header().parse(&[10, 1], false), |
| Ok(Chunk::NeedMoreData(1)) |
| ); |
| let mut p = parser_after_header(); |
| assert_matches!( |
| p.parse(&[10, 1, 0], false), |
| Ok(Chunk::Parsed { |
| consumed: 3, |
| payload: Payload::CodeSectionStart { count: 0, .. }, |
| }), |
| ); |
| assert_matches!( |
| p.parse(&[], true), |
| Ok(Chunk::Parsed { |
| consumed: 0, |
| payload: Payload::End(11), |
| }), |
| ); |
| let mut p = parser_after_header(); |
| assert_matches!( |
| p.parse(&[10, 2, 1, 0], false), |
| Ok(Chunk::Parsed { |
| consumed: 3, |
| payload: Payload::CodeSectionStart { count: 1, .. }, |
| }), |
| ); |
| assert_matches!( |
| p.parse(&[0], false), |
| Ok(Chunk::Parsed { |
| consumed: 1, |
| payload: Payload::CodeSectionEntry(_), |
| }), |
| ); |
| assert_matches!( |
| p.parse(&[], true), |
| Ok(Chunk::Parsed { |
| consumed: 0, |
| payload: Payload::End(12), |
| }), |
| ); |
| |
| // 1 byte section with 1 function can't read the function body because |
| // the section is too small |
| let mut p = parser_after_header(); |
| assert_matches!( |
| p.parse(&[10, 1, 1], false), |
| Ok(Chunk::Parsed { |
| consumed: 3, |
| payload: Payload::CodeSectionStart { count: 1, .. }, |
| }), |
| ); |
| assert_eq!( |
| p.parse(&[0], false).unwrap_err().message(), |
| "unexpected end-of-file" |
| ); |
| |
| // section with 2 functions but section is cut off |
| let mut p = parser_after_header(); |
| assert_matches!( |
| p.parse(&[10, 2, 2], false), |
| Ok(Chunk::Parsed { |
| consumed: 3, |
| payload: Payload::CodeSectionStart { count: 2, .. }, |
| }), |
| ); |
| assert_matches!( |
| p.parse(&[0], false), |
| Ok(Chunk::Parsed { |
| consumed: 1, |
| payload: Payload::CodeSectionEntry(_), |
| }), |
| ); |
| assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1))); |
| assert_eq!( |
| p.parse(&[0], false).unwrap_err().message(), |
| "unexpected end-of-file", |
| ); |
| |
| // trailing data is bad |
| let mut p = parser_after_header(); |
| assert_matches!( |
| p.parse(&[10, 3, 1], false), |
| Ok(Chunk::Parsed { |
| consumed: 3, |
| payload: Payload::CodeSectionStart { count: 1, .. }, |
| }), |
| ); |
| assert_matches!( |
| p.parse(&[0], false), |
| Ok(Chunk::Parsed { |
| consumed: 1, |
| payload: Payload::CodeSectionEntry(_), |
| }), |
| ); |
| assert_eq!( |
| p.parse(&[0], false).unwrap_err().message(), |
| "trailing bytes at end of section", |
| ); |
| } |
| |
| #[test] |
| fn single_module() { |
| let mut p = parser_after_component_header(); |
| assert_matches!(p.parse(&[4], false), Ok(Chunk::NeedMoreData(1))); |
| |
| // A module that's 8 bytes in length |
| let mut sub = match p.parse(&[1, 8], false) { |
| Ok(Chunk::Parsed { |
| consumed: 2, |
| payload: Payload::ModuleSection { parser, .. }, |
| }) => parser, |
| other => panic!("bad parse {:?}", other), |
| }; |
| |
| // Parse the header of the submodule with the sub-parser. |
| assert_matches!(sub.parse(&[], false), Ok(Chunk::NeedMoreData(4))); |
| assert_matches!(sub.parse(b"\0asm", false), Ok(Chunk::NeedMoreData(4))); |
| assert_matches!( |
| sub.parse(b"\0asm\x01\0\0\0", false), |
| Ok(Chunk::Parsed { |
| consumed: 8, |
| payload: Payload::Version { |
| num: 1, |
| encoding: Encoding::Module, |
| .. |
| }, |
| }), |
| ); |
| |
| // The sub-parser should be byte-limited so the next byte shouldn't get |
| // consumed, it's intended for the parent parser. |
| assert_matches!( |
| sub.parse(&[10], false), |
| Ok(Chunk::Parsed { |
| consumed: 0, |
| payload: Payload::End(18), |
| }), |
| ); |
| |
| // The parent parser should now be back to resuming, and we simulate it |
| // being done with bytes to ensure that it's safely at the end, |
| // completing the module code section. |
| assert_matches!(p.parse(&[], false), Ok(Chunk::NeedMoreData(1))); |
| assert_matches!( |
| p.parse(&[], true), |
| Ok(Chunk::Parsed { |
| consumed: 0, |
| payload: Payload::End(18), |
| }), |
| ); |
| } |
| |
| #[test] |
| fn nested_section_too_big() { |
| let mut p = parser_after_component_header(); |
| |
| // A module that's 10 bytes in length |
| let mut sub = match p.parse(&[1, 10], false) { |
| Ok(Chunk::Parsed { |
| consumed: 2, |
| payload: Payload::ModuleSection { parser, .. }, |
| }) => parser, |
| other => panic!("bad parse {:?}", other), |
| }; |
| |
| // use 8 bytes to parse the header, leaving 2 remaining bytes in our |
| // module. |
| assert_matches!( |
| sub.parse(b"\0asm\x01\0\0\0", false), |
| Ok(Chunk::Parsed { |
| consumed: 8, |
| payload: Payload::Version { num: 1, .. }, |
| }), |
| ); |
| |
| // We can't parse a section which declares its bigger than the outer |
| // module. This is a custom section, one byte big, with one content byte. The |
| // content byte, however, lives outside of the parent's module code |
| // section. |
| assert_eq!( |
| sub.parse(&[0, 1, 0], false).unwrap_err().message(), |
| "section too large", |
| ); |
| } |
| } |