android/vendor/zstd-0.11.2+zstd.1.5.2/src/dict.rs - toolchain/cargo-deny - Git at Google

 //! Train a dictionary from various sources.
 //!
 //! A dictionary can help improve the compression of small files.
 //! The dictionary must be present during decompression,
 //! but can be shared accross multiple "similar" files.
 //!
 //! Creating a dictionary using the `zstd` C library,
 //! using the `zstd` command-line interface, using this library,
 //! or using the `train` binary provided, should give the same result,
 //! and are therefore completely compatible.
 //!
 //! To use, see [`Encoder::with_dictionary`] or [`Decoder::with_dictionary`].
 //!
 //! [`Encoder::with_dictionary`]: ../struct.Encoder.html#method.with_dictionary
 //! [`Decoder::with_dictionary`]: ../struct.Decoder.html#method.with_dictionary

 #[cfg(feature = "zdict_builder")]
 use std::io::{self, Read};

 pub use zstd_safe::{CDict, DDict};

 /// Prepared dictionary for compression
 ///
 /// A dictionary can include its own copy of the data (if it is `'static`), or it can merely point
 /// to a separate buffer (if it has another lifetime).
 pub struct EncoderDictionary<'a> {
     cdict: CDict<'a>,
 }

 impl EncoderDictionary<'static> {
     /// Creates a prepared dictionary for compression.
     ///
     /// This will copy the dictionary internally.
     pub fn copy(dictionary: &[u8], level: i32) -> Self {
         Self {
             cdict: zstd_safe::create_cdict(dictionary, level),
         }
     }
 }

 impl<'a> EncoderDictionary<'a> {
     #[cfg(feature = "experimental")]
     #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))]
     /// Create prepared dictionary for compression
     ///
     /// A level of `0` uses zstd's default (currently `3`).
     ///
     /// Only available with the `experimental` feature. Use `EncoderDictionary::copy` otherwise.
     pub fn new(dictionary: &'a [u8], level: i32) -> Self {
         Self {
             cdict: zstd_safe::create_cdict_by_reference(dictionary, level),
         }
     }

     /// Returns reference to `CDict` inner object
     pub fn as_cdict(&self) -> &CDict<'a> {
         &self.cdict
     }
 }

 /// Prepared dictionary for decompression
 pub struct DecoderDictionary<'a> {
     ddict: DDict<'a>,
 }

 impl DecoderDictionary<'static> {
     /// Create a prepared dictionary for decompression.
     ///
     /// This will copy the dictionary internally.
     pub fn copy(dictionary: &[u8]) -> Self {
         Self {
             ddict: zstd_safe::DDict::create(dictionary),
         }
     }
 }

 impl<'a> DecoderDictionary<'a> {
     #[cfg(feature = "experimental")]
     #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))]
     /// Create prepared dictionary for decompression
     ///
     /// Only available with the `experimental` feature. Use `DecoderDictionary::copy` otherwise.
     pub fn new(dict: &'a [u8]) -> Self {
         Self {
             ddict: zstd_safe::create_ddict_by_reference(dict),
         }
     }

     /// Returns reference to `DDict` inner object
     pub fn as_ddict(&self) -> &DDict<'a> {
         &self.ddict
     }
 }

 /// Train a dictionary from a big continuous chunk of data.
 ///
 /// This is the most efficient way to train a dictionary,
 /// since this is directly fed into `zstd`.
 #[cfg(feature = "zdict_builder")]
 #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
 pub fn from_continuous(
     sample_data: &[u8],
     sample_sizes: &[usize],
     max_size: usize,
 ) -> io::Result<Vec<u8>> {
     use crate::map_error_code;

     // Complain if the lengths don't add up to the entire data.
     if sample_sizes.iter().sum::<usize>() != sample_data.len() {
         return Err(io::Error::new(
             io::ErrorKind::Other,
             "sample sizes don't add up".to_string(),
         ));
     }

     let mut result = Vec::with_capacity(max_size);
     zstd_safe::train_from_buffer(&mut result, sample_data, sample_sizes)
         .map_err(map_error_code)?;
     Ok(result)
 }

 /// Train a dictionary from multiple samples.
 ///
 /// The samples will internaly be copied to a single continuous buffer,
 /// so make sure you have enough memory available.
 ///
 /// If you need to stretch your system's limits,
 /// [`from_continuous`] directly uses the given slice.
 ///
 /// [`from_continuous`]: ./fn.from_continuous.html
 #[cfg(feature = "zdict_builder")]
 #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
 pub fn from_samples<S: AsRef<[u8]>>(
     samples: &[S],
     max_size: usize,
 ) -> io::Result<Vec<u8>> {
     // Copy every sample to a big chunk of memory
     let data: Vec<_> =
         samples.iter().flat_map(|s| s.as_ref()).cloned().collect();
     let sizes: Vec<_> = samples.iter().map(|s| s.as_ref().len()).collect();

     from_continuous(&data, &sizes, max_size)
 }

 /// Train a dict from a list of files.
 #[cfg(feature = "zdict_builder")]
 #[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
 pub fn from_files<I, P>(filenames: I, max_size: usize) -> io::Result<Vec<u8>>
 where
     P: AsRef<std::path::Path>,
     I: IntoIterator<Item = P>,
 {
     use std::fs;

     let mut buffer = Vec::new();
     let mut sizes = Vec::new();

     for filename in filenames {
         let mut file = fs::File::open(filename)?;
         let len = file.read_to_end(&mut buffer)?;
         sizes.push(len);
     }

     from_continuous(&buffer, &sizes, max_size)
 }

 #[cfg(test)]
 #[cfg(feature = "zdict_builder")]
 mod tests {
     use std::fs;
     use std::io;
     use std::io::Read;

     use walkdir;

     #[test]
     fn test_dict_training() {
         // Train a dictionary
         let paths: Vec<_> = walkdir::WalkDir::new("src")
             .into_iter()
             .map(|entry| entry.unwrap())
             .map(|entry| entry.into_path())
             .filter(|path| path.to_str().unwrap().ends_with(".rs"))
             .collect();

         let dict = super::from_files(&paths, 4000).unwrap();

         for path in paths {
             let mut buffer = Vec::new();
             let mut file = fs::File::open(path).unwrap();
             let mut content = Vec::new();
             file.read_to_end(&mut content).unwrap();
             io::copy(
                 &mut &content[..],
                 &mut crate::stream::Encoder::with_dictionary(
                     &mut buffer,
                     1,
                     &dict,
                 )
                 .unwrap()
                 .auto_finish(),
             )
             .unwrap();

             let mut result = Vec::new();
             io::copy(
                 &mut crate::stream::Decoder::with_dictionary(
                     &buffer[..],
                     &dict[..],
                 )
                 .unwrap(),
                 &mut result,
             )
             .unwrap();

             assert_eq!(&content, &result);
         }
     }
 }
	//! Train a dictionary from various sources.
	//!
	//! A dictionary can help improve the compression of small files.
	//! The dictionary must be present during decompression,
	//! but can be shared accross multiple "similar" files.
	//!
	//! Creating a dictionary using the `zstd` C library,
	//! using the `zstd` command-line interface, using this library,
	//! or using the `train` binary provided, should give the same result,
	//! and are therefore completely compatible.
	//!
	//! To use, see [`Encoder::with_dictionary`] or [`Decoder::with_dictionary`].
	//!
	//! [`Encoder::with_dictionary`]: ../struct.Encoder.html#method.with_dictionary
	//! [`Decoder::with_dictionary`]: ../struct.Decoder.html#method.with_dictionary

	#[cfg(feature = "zdict_builder")]
	use std::io::{self, Read};

	pub use zstd_safe::{CDict, DDict};

	/// Prepared dictionary for compression
	///
	/// A dictionary can include its own copy of the data (if it is `'static`), or it can merely point
	/// to a separate buffer (if it has another lifetime).
	pub struct EncoderDictionary<'a> {
	cdict: CDict<'a>,
	}

	impl EncoderDictionary<'static> {
	/// Creates a prepared dictionary for compression.
	///
	/// This will copy the dictionary internally.
	pub fn copy(dictionary: &[u8], level: i32) -> Self {
	Self {
	cdict: zstd_safe::create_cdict(dictionary, level),
	}
	}
	}

	impl<'a> EncoderDictionary<'a> {
	#[cfg(feature = "experimental")]
	#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))]
	/// Create prepared dictionary for compression
	///
	/// A level of `0` uses zstd's default (currently `3`).
	///
	/// Only available with the `experimental` feature. Use `EncoderDictionary::copy` otherwise.
	pub fn new(dictionary: &'a [u8], level: i32) -> Self {
	Self {
	cdict: zstd_safe::create_cdict_by_reference(dictionary, level),
	}
	}

	/// Returns reference to `CDict` inner object
	pub fn as_cdict(&self) -> &CDict<'a> {
	&self.cdict
	}
	}

	/// Prepared dictionary for decompression
	pub struct DecoderDictionary<'a> {
	ddict: DDict<'a>,
	}

	impl DecoderDictionary<'static> {
	/// Create a prepared dictionary for decompression.
	///
	/// This will copy the dictionary internally.
	pub fn copy(dictionary: &[u8]) -> Self {
	Self {
	ddict: zstd_safe::DDict::create(dictionary),
	}
	}
	}

	impl<'a> DecoderDictionary<'a> {
	#[cfg(feature = "experimental")]
	#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "experimental")))]
	/// Create prepared dictionary for decompression
	///
	/// Only available with the `experimental` feature. Use `DecoderDictionary::copy` otherwise.
	pub fn new(dict: &'a [u8]) -> Self {
	Self {
	ddict: zstd_safe::create_ddict_by_reference(dict),
	}
	}

	/// Returns reference to `DDict` inner object
	pub fn as_ddict(&self) -> &DDict<'a> {
	&self.ddict
	}
	}

	/// Train a dictionary from a big continuous chunk of data.
	///
	/// This is the most efficient way to train a dictionary,
	/// since this is directly fed into `zstd`.
	#[cfg(feature = "zdict_builder")]
	#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
	pub fn from_continuous(
	sample_data: &[u8],
	sample_sizes: &[usize],
	max_size: usize,
	) -> io::Result<Vec<u8>> {
	use crate::map_error_code;

	// Complain if the lengths don't add up to the entire data.
	if sample_sizes.iter().sum::<usize>() != sample_data.len() {
	return Err(io::Error::new(
	io::ErrorKind::Other,
	"sample sizes don't add up".to_string(),
	));
	}

	let mut result = Vec::with_capacity(max_size);
	zstd_safe::train_from_buffer(&mut result, sample_data, sample_sizes)
	.map_err(map_error_code)?;
	Ok(result)
	}

	/// Train a dictionary from multiple samples.
	///
	/// The samples will internaly be copied to a single continuous buffer,
	/// so make sure you have enough memory available.
	///
	/// If you need to stretch your system's limits,
	/// [`from_continuous`] directly uses the given slice.
	///
	/// [`from_continuous`]: ./fn.from_continuous.html
	#[cfg(feature = "zdict_builder")]
	#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
	pub fn from_samples<S: AsRef<[u8]>>(
	samples: &[S],
	max_size: usize,
	) -> io::Result<Vec<u8>> {
	// Copy every sample to a big chunk of memory
	let data: Vec<_> =
	samples.iter().flat_map(\|s\| s.as_ref()).cloned().collect();
	let sizes: Vec<_> = samples.iter().map(\|s\| s.as_ref().len()).collect();

	from_continuous(&data, &sizes, max_size)
	}

	/// Train a dict from a list of files.
	#[cfg(feature = "zdict_builder")]
	#[cfg_attr(feature = "doc-cfg", doc(cfg(feature = "zdict_builder")))]
	pub fn from_files<I, P>(filenames: I, max_size: usize) -> io::Result<Vec<u8>>
	where
	P: AsRef<std::path::Path>,
	I: IntoIterator<Item = P>,
	{
	use std::fs;

	let mut buffer = Vec::new();
	let mut sizes = Vec::new();

	for filename in filenames {
	let mut file = fs::File::open(filename)?;
	let len = file.read_to_end(&mut buffer)?;
	sizes.push(len);
	}

	from_continuous(&buffer, &sizes, max_size)
	}

	#[cfg(test)]
	#[cfg(feature = "zdict_builder")]
	mod tests {
	use std::fs;
	use std::io;
	use std::io::Read;

	use walkdir;

	#[test]
	fn test_dict_training() {
	// Train a dictionary
	let paths: Vec<_> = walkdir::WalkDir::new("src")
	.into_iter()
	.map(\|entry\| entry.unwrap())
	.map(\|entry\| entry.into_path())
	.filter(\|path\| path.to_str().unwrap().ends_with(".rs"))
	.collect();

	let dict = super::from_files(&paths, 4000).unwrap();

	for path in paths {
	let mut buffer = Vec::new();
	let mut file = fs::File::open(path).unwrap();
	let mut content = Vec::new();
	file.read_to_end(&mut content).unwrap();
	io::copy(
	&mut &content[..],
	&mut crate::stream::Encoder::with_dictionary(
	&mut buffer,
	1,
	&dict,
	)
	.unwrap()
	.auto_finish(),
	)
	.unwrap();

	let mut result = Vec::new();
	io::copy(
	&mut crate::stream::Decoder::with_dictionary(
	&buffer[..],
	&dict[..],
	)
	.unwrap(),
	&mut result,
	)
	.unwrap();

	assert_eq!(&content, &result);
	}
	}
	}