blob: b7a23498439c64cc8feb66178caaf456fe7a6a98 [file] [log] [blame] [edit]
use crate::io::{Error, Read};
use core::fmt;
#[cfg(feature = "std")]
use std::error::Error as StdError;
/// This magic number is included at the start of a single Zstandard frame
pub const MAGIC_NUM: u32 = 0xFD2F_B528;
/// The minimum window size is defined as 1 KB
pub const MIN_WINDOW_SIZE: u64 = 1024;
/// The maximum window size is 3.75TB
pub const MAX_WINDOW_SIZE: u64 = (1 << 41) + 7 * (1 << 38);
/// Zstandard compressed data is made of one or more [Frame]s. Each frame is independent and can be
/// decompressed independently of other frames.
///
/// There are two frame formats defined by Zstandard: Zstandard frames and Skippable frames.
/// Zstandard frames contain compressed data, while skippable frames contain custom user metadata.
///
/// This structure contains the header of the frame.
///
/// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frames>
pub struct Frame {
pub header: FrameHeader,
}
/// A frame header has a variable size, with a minimum of 2 bytes, and a maximum of 14 bytes.
pub struct FrameHeader {
pub descriptor: FrameDescriptor,
/// The `Window_Descriptor` field contains the minimum size of a memory buffer needed to
/// decompress the entire frame.
///
/// This byte is not included in the frame header when the `Single_Segment_flag` is set.
///
/// Bits 7-3 refer to the `Exponent`, where bits 2-0 refer to the `Mantissa`.
///
/// To determine the size of a window, the following formula can be used:
/// ```text
/// windowLog = 10 + Exponent;
/// windowBase = 1 << windowLog;
/// windowAdd = (windowBase / 8) * Mantissa;
/// Window_Size = windowBase + windowAdd;
/// ```
/// <https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor>
window_descriptor: u8,
/// The `Dictionary_ID` field contains the ID of the dictionary to be used to decode the frame.
/// When this value is not present, it's up to the decoder to know which dictionary to use.
dict_id: Option<u32>,
/// The size of the original/uncompressed content.
frame_content_size: u64,
}
/// The first byte is called the `Frame Header Descriptor`, and it describes what other fields
/// are present.
pub struct FrameDescriptor(u8);
#[derive(Debug)]
#[non_exhaustive]
pub enum FrameDescriptorError {
InvalidFrameContentSizeFlag { got: u8 },
}
impl fmt::Display for FrameDescriptorError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::InvalidFrameContentSizeFlag { got } => write!(
f,
"Invalid Frame_Content_Size_Flag; Is: {}, Should be one of: 0, 1, 2, 3",
got
),
}
}
}
#[cfg(feature = "std")]
impl StdError for FrameDescriptorError {}
impl FrameDescriptor {
/// Read the `Frame_Content_Size_flag` from the frame header descriptor.
///
/// This is a 2 bit flag, specifying if the `Frame_Content_Size` field is present
/// within the header. It notates the number of bytes used by `Frame_Content_size`
///
/// When this value is is 0, `FCS_Field_Size` depends on Single_Segment_flag.
/// If the `Single_Segment_flag` field is set in the frame header descriptor,
/// the size of the `Frame_Content_Size` field of the header is 1 byte.
/// Otherwise, `FCS_Field_Size` is 0, and the `Frame_Content_Size` is not provided.
///
/// | Flag Value (decimal) | Size of the `Frame_Content_Size` field in bytes |
/// | -- | -- |
/// | 0 | 0 or 1 (see above) |
/// | 1 | 2 |
/// | 2 | 4 |
/// | 3 | 8 |
pub fn frame_content_size_flag(&self) -> u8 {
self.0 >> 6
}
/// This bit is reserved for some future feature, a compliant decoder **must ensure**
/// that this value is set to zero.
pub fn reserved_flag(&self) -> bool {
((self.0 >> 3) & 0x1) == 1
}
/// If this flag is set, data must be regenerated within a single continuous memory segment.
///
/// In this case, the `Window_Descriptor` byte is skipped, but `Frame_Content_Size` is present.
/// The decoder must allocate a memory segment equal to or larger than `Frame_Content_Size`.
pub fn single_segment_flag(&self) -> bool {
((self.0 >> 5) & 0x1) == 1
}
/// If this flag is set, a 32 bit `Content_Checksum` will be present at the end of the frame.
pub fn content_checksum_flag(&self) -> bool {
((self.0 >> 2) & 0x1) == 1
}
/// This is a two bit flag telling if a dictionary ID is provided within the header. It also
/// specifies the size of this field
///
/// | Value (Decimal) | `DID_Field_Size` (bytes) |
/// | -- | -- |
/// | 0 | 0 |
/// | 1 | 1 |
/// | 2 | 2 |
/// | 3 | 4 |
pub fn dict_id_flag(&self) -> u8 {
self.0 & 0x3
}
/// Read the size of the `Frame_Content_size` field from the frame header descriptor, returning
/// the size in bytes.
/// If this value is zero, then the `Frame_Content_Size` field is not present within the header.
pub fn frame_content_size_bytes(&self) -> Result<u8, FrameDescriptorError> {
match self.frame_content_size_flag() {
0 => {
if self.single_segment_flag() {
Ok(1)
} else {
Ok(0)
}
}
1 => Ok(2),
2 => Ok(4),
3 => Ok(8),
other => Err(FrameDescriptorError::InvalidFrameContentSizeFlag { got: other }),
}
}
/// Read the size of the `Dictionary_ID` field from the frame header descriptor, returning the size in bytes.
/// If this value is zero, then the dictionary id is not present within the header,
/// and "It's up to the decoder to know which dictionary to use."
pub fn dictionary_id_bytes(&self) -> Result<u8, FrameDescriptorError> {
match self.dict_id_flag() {
0 => Ok(0),
1 => Ok(1),
2 => Ok(2),
3 => Ok(4),
other => Err(FrameDescriptorError::InvalidFrameContentSizeFlag { got: other }),
}
}
}
#[derive(Debug)]
#[non_exhaustive]
pub enum FrameHeaderError {
WindowTooBig { got: u64 },
WindowTooSmall { got: u64 },
FrameDescriptorError(FrameDescriptorError),
DictIdTooSmall { got: usize, expected: usize },
MismatchedFrameSize { got: usize, expected: u8 },
FrameSizeIsZero,
InvalidFrameSize { got: u8 },
}
impl fmt::Display for FrameHeaderError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::WindowTooBig { got } => write!(
f,
"window_size bigger than allowed maximum. Is: {}, Should be lower than: {}",
got, MAX_WINDOW_SIZE
),
Self::WindowTooSmall { got } => write!(
f,
"window_size smaller than allowed minimum. Is: {}, Should be greater than: {}",
got, MIN_WINDOW_SIZE
),
Self::FrameDescriptorError(e) => write!(f, "{:?}", e),
Self::DictIdTooSmall { got, expected } => write!(
f,
"Not enough bytes in dict_id. Is: {}, Should be: {}",
got, expected
),
Self::MismatchedFrameSize { got, expected } => write!(
f,
"frame_content_size does not have the right length. Is: {}, Should be: {}",
got, expected
),
Self::FrameSizeIsZero => write!(f, "frame_content_size was zero"),
Self::InvalidFrameSize { got } => write!(
f,
"Invalid frame_content_size. Is: {}, Should be one of 1, 2, 4, 8 bytes",
got
),
}
}
}
#[cfg(feature = "std")]
impl StdError for FrameHeaderError {
fn source(&self) -> Option<&(dyn StdError + 'static)> {
match self {
FrameHeaderError::FrameDescriptorError(source) => Some(source),
_ => None,
}
}
}
impl From<FrameDescriptorError> for FrameHeaderError {
fn from(error: FrameDescriptorError) -> Self {
Self::FrameDescriptorError(error)
}
}
impl FrameHeader {
/// Read the size of the window from the header, returning the size in bytes.
pub fn window_size(&self) -> Result<u64, FrameHeaderError> {
if self.descriptor.single_segment_flag() {
Ok(self.frame_content_size())
} else {
let exp = self.window_descriptor >> 3;
let mantissa = self.window_descriptor & 0x7;
let window_log = 10 + u64::from(exp);
let window_base = 1 << window_log;
let window_add = (window_base / 8) * u64::from(mantissa);
let window_size = window_base + window_add;
if window_size >= MIN_WINDOW_SIZE {
if window_size < MAX_WINDOW_SIZE {
Ok(window_size)
} else {
Err(FrameHeaderError::WindowTooBig { got: window_size })
}
} else {
Err(FrameHeaderError::WindowTooSmall { got: window_size })
}
}
}
/// The ID (if provided) of the dictionary required to decode this frame.
pub fn dictionary_id(&self) -> Option<u32> {
self.dict_id
}
/// Obtain the uncompressed size (in bytes) of the frame contents.
pub fn frame_content_size(&self) -> u64 {
self.frame_content_size
}
}
#[derive(Debug)]
#[non_exhaustive]
pub enum ReadFrameHeaderError {
MagicNumberReadError(Error),
BadMagicNumber(u32),
FrameDescriptorReadError(Error),
InvalidFrameDescriptor(FrameDescriptorError),
WindowDescriptorReadError(Error),
DictionaryIdReadError(Error),
FrameContentSizeReadError(Error),
SkipFrame { magic_number: u32, length: u32 },
}
impl fmt::Display for ReadFrameHeaderError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::MagicNumberReadError(e) => write!(f, "Error while reading magic number: {}", e),
Self::BadMagicNumber(e) => write!(f, "Read wrong magic number: 0x{:X}", e),
Self::FrameDescriptorReadError(e) => {
write!(f, "Error while reading frame descriptor: {}", e)
}
Self::InvalidFrameDescriptor(e) => write!(f, "{:?}", e),
Self::WindowDescriptorReadError(e) => {
write!(f, "Error while reading window descriptor: {}", e)
}
Self::DictionaryIdReadError(e) => write!(f, "Error while reading dictionary id: {}", e),
Self::FrameContentSizeReadError(e) => {
write!(f, "Error while reading frame content size: {}", e)
}
Self::SkipFrame {
magic_number,
length,
} => write!(
f,
"SkippableFrame encountered with MagicNumber 0x{:X} and length {} bytes",
magic_number, length
),
}
}
}
#[cfg(feature = "std")]
impl StdError for ReadFrameHeaderError {
fn source(&self) -> Option<&(dyn StdError + 'static)> {
match self {
ReadFrameHeaderError::MagicNumberReadError(source) => Some(source),
ReadFrameHeaderError::FrameDescriptorReadError(source) => Some(source),
ReadFrameHeaderError::InvalidFrameDescriptor(source) => Some(source),
ReadFrameHeaderError::WindowDescriptorReadError(source) => Some(source),
ReadFrameHeaderError::DictionaryIdReadError(source) => Some(source),
ReadFrameHeaderError::FrameContentSizeReadError(source) => Some(source),
_ => None,
}
}
}
impl From<FrameDescriptorError> for ReadFrameHeaderError {
fn from(error: FrameDescriptorError) -> Self {
Self::InvalidFrameDescriptor(error)
}
}
/// Read a single serialized frame from the reader and return a tuple containing the parsed frame and the number of bytes read.
pub fn read_frame_header(mut r: impl Read) -> Result<(Frame, u8), ReadFrameHeaderError> {
use ReadFrameHeaderError as err;
let mut buf = [0u8; 4];
r.read_exact(&mut buf).map_err(err::MagicNumberReadError)?;
let mut bytes_read = 4;
let magic_num = u32::from_le_bytes(buf);
// Skippable frames have a magic number in this interval
if (0x184D2A50..=0x184D2A5F).contains(&magic_num) {
r.read_exact(&mut buf)
.map_err(err::FrameDescriptorReadError)?;
let skip_size = u32::from_le_bytes(buf);
return Err(ReadFrameHeaderError::SkipFrame {
magic_number: magic_num,
length: skip_size,
});
}
if magic_num != MAGIC_NUM {
return Err(ReadFrameHeaderError::BadMagicNumber(magic_num));
}
r.read_exact(&mut buf[0..1])
.map_err(err::FrameDescriptorReadError)?;
let desc = FrameDescriptor(buf[0]);
bytes_read += 1;
let mut frame_header = FrameHeader {
descriptor: FrameDescriptor(desc.0),
dict_id: None,
frame_content_size: 0,
window_descriptor: 0,
};
if !desc.single_segment_flag() {
r.read_exact(&mut buf[0..1])
.map_err(err::WindowDescriptorReadError)?;
frame_header.window_descriptor = buf[0];
bytes_read += 1;
}
let dict_id_len = desc.dictionary_id_bytes()? as usize;
if dict_id_len != 0 {
let buf = &mut buf[..dict_id_len];
r.read_exact(buf).map_err(err::DictionaryIdReadError)?;
bytes_read += dict_id_len;
let mut dict_id = 0u32;
#[allow(clippy::needless_range_loop)]
for i in 0..dict_id_len {
dict_id += (buf[i] as u32) << (8 * i);
}
if dict_id != 0 {
frame_header.dict_id = Some(dict_id);
}
}
let fcs_len = desc.frame_content_size_bytes()? as usize;
if fcs_len != 0 {
let mut fcs_buf = [0u8; 8];
let fcs_buf = &mut fcs_buf[..fcs_len];
r.read_exact(fcs_buf)
.map_err(err::FrameContentSizeReadError)?;
bytes_read += fcs_len;
let mut fcs = 0u64;
#[allow(clippy::needless_range_loop)]
for i in 0..fcs_len {
fcs += (fcs_buf[i] as u64) << (8 * i);
}
if fcs_len == 2 {
fcs += 256;
}
frame_header.frame_content_size = fcs;
}
let frame: Frame = Frame {
header: frame_header,
};
Ok((frame, bytes_read as u8))
}