| use std::cmp; |
| use std::io; |
| use std::io::prelude::*; |
| |
| use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser}; |
| use crate::crc::{Crc, CrcWriter}; |
| use crate::zio; |
| use crate::{Compress, Compression, Decompress, Status}; |
| |
| /// A gzip streaming encoder |
| /// |
| /// This structure exposes a [`Write`] interface that will emit compressed data |
| /// to the underlying writer `W`. |
| /// |
| /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use std::io::prelude::*; |
| /// use flate2::Compression; |
| /// use flate2::write::GzEncoder; |
| /// |
| /// // Vec<u8> implements Write to print the compressed bytes of sample string |
| /// # fn main() { |
| /// |
| /// let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| /// e.write_all(b"Hello World").unwrap(); |
| /// println!("{:?}", e.finish().unwrap()); |
| /// # } |
| /// ``` |
| #[derive(Debug)] |
| pub struct GzEncoder<W: Write> { |
| inner: zio::Writer<W, Compress>, |
| crc: Crc, |
| crc_bytes_written: usize, |
| header: Vec<u8>, |
| } |
| |
| pub fn gz_encoder<W: Write>(header: Vec<u8>, w: W, lvl: Compression) -> GzEncoder<W> { |
| GzEncoder { |
| inner: zio::Writer::new(w, Compress::new(lvl, false)), |
| crc: Crc::new(), |
| header, |
| crc_bytes_written: 0, |
| } |
| } |
| |
| impl<W: Write> GzEncoder<W> { |
| /// Creates a new encoder which will use the given compression level. |
| /// |
| /// The encoder is not configured specially for the emitted header. For |
| /// header configuration, see the `GzBuilder` type. |
| /// |
| /// The data written to the returned encoder will be compressed and then |
| /// written to the stream `w`. |
| pub fn new(w: W, level: Compression) -> GzEncoder<W> { |
| GzBuilder::new().write(w, level) |
| } |
| |
| /// Acquires a reference to the underlying writer. |
| pub fn get_ref(&self) -> &W { |
| self.inner.get_ref() |
| } |
| |
| /// Acquires a mutable reference to the underlying writer. |
| /// |
| /// Note that mutation of the writer may result in surprising results if |
| /// this encoder is continued to be used. |
| pub fn get_mut(&mut self) -> &mut W { |
| self.inner.get_mut() |
| } |
| |
| /// Attempt to finish this output stream, writing out final chunks of data. |
| /// |
| /// Note that this function can only be used once data has finished being |
| /// written to the output stream. After this function is called then further |
| /// calls to `write` may result in a panic. |
| /// |
| /// # Panics |
| /// |
| /// Attempts to write data to this stream may result in a panic after this |
| /// function is called. |
| /// |
| /// # Errors |
| /// |
| /// This function will perform I/O to complete this stream, and any I/O |
| /// errors which occur will be returned from this function. |
| pub fn try_finish(&mut self) -> io::Result<()> { |
| self.write_header()?; |
| self.inner.finish()?; |
| |
| while self.crc_bytes_written < 8 { |
| let (sum, amt) = (self.crc.sum(), self.crc.amount()); |
| let buf = [ |
| (sum >> 0) as u8, |
| (sum >> 8) as u8, |
| (sum >> 16) as u8, |
| (sum >> 24) as u8, |
| (amt >> 0) as u8, |
| (amt >> 8) as u8, |
| (amt >> 16) as u8, |
| (amt >> 24) as u8, |
| ]; |
| let inner = self.inner.get_mut(); |
| let n = inner.write(&buf[self.crc_bytes_written..])?; |
| self.crc_bytes_written += n; |
| } |
| Ok(()) |
| } |
| |
| /// Finish encoding this stream, returning the underlying writer once the |
| /// encoding is done. |
| /// |
| /// Note that this function may not be suitable to call in a situation where |
| /// the underlying stream is an asynchronous I/O stream. To finish a stream |
| /// the `try_finish` (or `shutdown`) method should be used instead. To |
| /// re-acquire ownership of a stream it is safe to call this method after |
| /// `try_finish` or `shutdown` has returned `Ok`. |
| /// |
| /// # Errors |
| /// |
| /// This function will perform I/O to complete this stream, and any I/O |
| /// errors which occur will be returned from this function. |
| pub fn finish(mut self) -> io::Result<W> { |
| self.try_finish()?; |
| Ok(self.inner.take_inner()) |
| } |
| |
| fn write_header(&mut self) -> io::Result<()> { |
| while !self.header.is_empty() { |
| let n = self.inner.get_mut().write(&self.header)?; |
| self.header.drain(..n); |
| } |
| Ok(()) |
| } |
| } |
| |
| impl<W: Write> Write for GzEncoder<W> { |
| fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
| assert_eq!(self.crc_bytes_written, 0); |
| self.write_header()?; |
| let n = self.inner.write(buf)?; |
| self.crc.update(&buf[..n]); |
| Ok(n) |
| } |
| |
| fn flush(&mut self) -> io::Result<()> { |
| assert_eq!(self.crc_bytes_written, 0); |
| self.write_header()?; |
| self.inner.flush() |
| } |
| } |
| |
| impl<R: Read + Write> Read for GzEncoder<R> { |
| fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
| self.get_mut().read(buf) |
| } |
| } |
| |
| impl<W: Write> Drop for GzEncoder<W> { |
| fn drop(&mut self) { |
| if self.inner.is_present() { |
| let _ = self.try_finish(); |
| } |
| } |
| } |
| |
| /// A decoder for a single member of a [gzip file]. |
| /// |
| /// This structure exposes a [`Write`] interface, receiving compressed data and |
| /// writing uncompressed data to the underlying writer. |
| /// |
| /// After decoding a single member of the gzip data this writer will return the number of bytes up to |
| /// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to |
| /// handle any data following the gzip member. |
| /// |
| /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] |
| /// or read more |
| /// [in the introduction](../index.html#about-multi-member-gzip-files). |
| /// |
| /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
| /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html |
| /// |
| /// # Examples |
| /// |
| /// ``` |
| /// use std::io::prelude::*; |
| /// use std::io; |
| /// use flate2::Compression; |
| /// use flate2::write::{GzEncoder, GzDecoder}; |
| /// |
| /// # fn main() { |
| /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| /// # e.write(b"Hello World").unwrap(); |
| /// # let bytes = e.finish().unwrap(); |
| /// # assert_eq!("Hello World", decode_writer(bytes).unwrap()); |
| /// # } |
| /// // Uncompresses a gzip encoded vector of bytes and returns a string or error |
| /// // Here Vec<u8> implements Write |
| /// fn decode_writer(bytes: Vec<u8>) -> io::Result<String> { |
| /// let mut writer = Vec::new(); |
| /// let mut decoder = GzDecoder::new(writer); |
| /// decoder.write_all(&bytes[..])?; |
| /// writer = decoder.finish()?; |
| /// let return_string = String::from_utf8(writer).expect("String parsing error"); |
| /// Ok(return_string) |
| /// } |
| /// ``` |
| #[derive(Debug)] |
| pub struct GzDecoder<W: Write> { |
| inner: zio::Writer<CrcWriter<W>, Decompress>, |
| crc_bytes: Vec<u8>, |
| header_parser: GzHeaderParser, |
| } |
| |
| const CRC_BYTES_LEN: usize = 8; |
| |
| impl<W: Write> GzDecoder<W> { |
| /// Creates a new decoder which will write uncompressed data to the stream. |
| /// |
| /// When this encoder is dropped or unwrapped the final pieces of data will |
| /// be flushed. |
| pub fn new(w: W) -> GzDecoder<W> { |
| GzDecoder { |
| inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)), |
| crc_bytes: Vec::with_capacity(CRC_BYTES_LEN), |
| header_parser: GzHeaderParser::new(), |
| } |
| } |
| |
| /// Returns the header associated with this stream. |
| pub fn header(&self) -> Option<&GzHeader> { |
| self.header_parser.header() |
| } |
| |
| /// Acquires a reference to the underlying writer. |
| pub fn get_ref(&self) -> &W { |
| self.inner.get_ref().get_ref() |
| } |
| |
| /// Acquires a mutable reference to the underlying writer. |
| /// |
| /// Note that mutating the output/input state of the stream may corrupt this |
| /// object, so care must be taken when using this method. |
| pub fn get_mut(&mut self) -> &mut W { |
| self.inner.get_mut().get_mut() |
| } |
| |
| /// Attempt to finish this output stream, writing out final chunks of data. |
| /// |
| /// Note that this function can only be used once data has finished being |
| /// written to the output stream. After this function is called then further |
| /// calls to `write` may result in a panic. |
| /// |
| /// # Panics |
| /// |
| /// Attempts to write data to this stream may result in a panic after this |
| /// function is called. |
| /// |
| /// # Errors |
| /// |
| /// This function will perform I/O to finish the stream, returning any |
| /// errors which happen. |
| pub fn try_finish(&mut self) -> io::Result<()> { |
| self.finish_and_check_crc()?; |
| Ok(()) |
| } |
| |
| /// Consumes this decoder, flushing the output stream. |
| /// |
| /// This will flush the underlying data stream and then return the contained |
| /// writer if the flush succeeded. |
| /// |
| /// Note that this function may not be suitable to call in a situation where |
| /// the underlying stream is an asynchronous I/O stream. To finish a stream |
| /// the `try_finish` (or `shutdown`) method should be used instead. To |
| /// re-acquire ownership of a stream it is safe to call this method after |
| /// `try_finish` or `shutdown` has returned `Ok`. |
| /// |
| /// # Errors |
| /// |
| /// This function will perform I/O to complete this stream, and any I/O |
| /// errors which occur will be returned from this function. |
| pub fn finish(mut self) -> io::Result<W> { |
| self.finish_and_check_crc()?; |
| Ok(self.inner.take_inner().into_inner()) |
| } |
| |
| fn finish_and_check_crc(&mut self) -> io::Result<()> { |
| self.inner.finish()?; |
| |
| if self.crc_bytes.len() != 8 { |
| return Err(corrupt()); |
| } |
| |
| let crc = ((self.crc_bytes[0] as u32) << 0) |
| | ((self.crc_bytes[1] as u32) << 8) |
| | ((self.crc_bytes[2] as u32) << 16) |
| | ((self.crc_bytes[3] as u32) << 24); |
| let amt = ((self.crc_bytes[4] as u32) << 0) |
| | ((self.crc_bytes[5] as u32) << 8) |
| | ((self.crc_bytes[6] as u32) << 16) |
| | ((self.crc_bytes[7] as u32) << 24); |
| if crc != self.inner.get_ref().crc().sum() { |
| return Err(corrupt()); |
| } |
| if amt != self.inner.get_ref().crc().amount() { |
| return Err(corrupt()); |
| } |
| Ok(()) |
| } |
| } |
| |
| impl<W: Write> Write for GzDecoder<W> { |
| fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> { |
| let buflen = buf.len(); |
| if self.header().is_none() { |
| match self.header_parser.parse(&mut buf) { |
| Err(err) => { |
| if err.kind() == io::ErrorKind::UnexpectedEof { |
| // all data read but header still not complete |
| Ok(buflen) |
| } else { |
| Err(err) |
| } |
| } |
| Ok(_) => { |
| debug_assert!(self.header().is_some()); |
| // buf now contains the unread part of the original buf |
| let n = buflen - buf.len(); |
| Ok(n) |
| } |
| } |
| } else { |
| let (n, status) = self.inner.write_with_status(buf)?; |
| |
| if status == Status::StreamEnd && n < buf.len() && self.crc_bytes.len() < 8 { |
| let remaining = buf.len() - n; |
| let crc_bytes = cmp::min(remaining, CRC_BYTES_LEN - self.crc_bytes.len()); |
| self.crc_bytes.extend(&buf[n..n + crc_bytes]); |
| return Ok(n + crc_bytes); |
| } |
| Ok(n) |
| } |
| } |
| |
| fn flush(&mut self) -> io::Result<()> { |
| self.inner.flush() |
| } |
| } |
| |
| impl<W: Read + Write> Read for GzDecoder<W> { |
| fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { |
| self.inner.get_mut().get_mut().read(buf) |
| } |
| } |
| |
| /// A gzip streaming decoder that decodes a [gzip file] with multiple members. |
| /// |
| /// This structure exposes a [`Write`] interface that will consume compressed data and |
| /// write uncompressed data to the underlying writer. |
| /// |
| /// A gzip file consists of a series of *members* concatenated one after another. |
| /// `MultiGzDecoder` decodes all members of a file and writes them to the |
| /// underlying writer one after another. |
| /// |
| /// To handle members separately, see [GzDecoder] or read more |
| /// [in the introduction](../index.html#about-multi-member-gzip-files). |
| /// |
| /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 |
| #[derive(Debug)] |
| pub struct MultiGzDecoder<W: Write> { |
| inner: GzDecoder<W>, |
| } |
| |
| impl<W: Write> MultiGzDecoder<W> { |
| /// Creates a new decoder which will write uncompressed data to the stream. |
| /// If the gzip stream contains multiple members all will be decoded. |
| pub fn new(w: W) -> MultiGzDecoder<W> { |
| MultiGzDecoder { |
| inner: GzDecoder::new(w), |
| } |
| } |
| |
| /// Returns the header associated with the current member. |
| pub fn header(&self) -> Option<&GzHeader> { |
| self.inner.header() |
| } |
| |
| /// Acquires a reference to the underlying writer. |
| pub fn get_ref(&self) -> &W { |
| self.inner.get_ref() |
| } |
| |
| /// Acquires a mutable reference to the underlying writer. |
| /// |
| /// Note that mutating the output/input state of the stream may corrupt this |
| /// object, so care must be taken when using this method. |
| pub fn get_mut(&mut self) -> &mut W { |
| self.inner.get_mut() |
| } |
| |
| /// Attempt to finish this output stream, writing out final chunks of data. |
| /// |
| /// Note that this function can only be used once data has finished being |
| /// written to the output stream. After this function is called then further |
| /// calls to `write` may result in a panic. |
| /// |
| /// # Panics |
| /// |
| /// Attempts to write data to this stream may result in a panic after this |
| /// function is called. |
| /// |
| /// # Errors |
| /// |
| /// This function will perform I/O to finish the stream, returning any |
| /// errors which happen. |
| pub fn try_finish(&mut self) -> io::Result<()> { |
| self.inner.try_finish() |
| } |
| |
| /// Consumes this decoder, flushing the output stream. |
| /// |
| /// This will flush the underlying data stream and then return the contained |
| /// writer if the flush succeeded. |
| /// |
| /// Note that this function may not be suitable to call in a situation where |
| /// the underlying stream is an asynchronous I/O stream. To finish a stream |
| /// the `try_finish` (or `shutdown`) method should be used instead. To |
| /// re-acquire ownership of a stream it is safe to call this method after |
| /// `try_finish` or `shutdown` has returned `Ok`. |
| /// |
| /// # Errors |
| /// |
| /// This function will perform I/O to complete this stream, and any I/O |
| /// errors which occur will be returned from this function. |
| pub fn finish(self) -> io::Result<W> { |
| self.inner.finish() |
| } |
| } |
| |
| impl<W: Write> Write for MultiGzDecoder<W> { |
| fn write(&mut self, buf: &[u8]) -> io::Result<usize> { |
| if buf.is_empty() { |
| Ok(0) |
| } else { |
| match self.inner.write(buf) { |
| Ok(0) => { |
| // When the GzDecoder indicates that it has finished |
| // create a new GzDecoder to handle additional data. |
| self.inner.try_finish()?; |
| let w = self.inner.inner.take_inner().into_inner(); |
| self.inner = GzDecoder::new(w); |
| self.inner.write(buf) |
| } |
| res => res, |
| } |
| } |
| } |
| |
| fn flush(&mut self) -> io::Result<()> { |
| self.inner.flush() |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| const STR: &str = "Hello World Hello World Hello World Hello World Hello World \ |
| Hello World Hello World Hello World Hello World Hello World \ |
| Hello World Hello World Hello World Hello World Hello World \ |
| Hello World Hello World Hello World Hello World Hello World \ |
| Hello World Hello World Hello World Hello World Hello World"; |
| |
| #[test] |
| fn decode_writer_one_chunk() { |
| let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| e.write(STR.as_ref()).unwrap(); |
| let bytes = e.finish().unwrap(); |
| |
| let mut writer = Vec::new(); |
| let mut decoder = GzDecoder::new(writer); |
| let n = decoder.write(&bytes[..]).unwrap(); |
| decoder.write(&bytes[n..]).unwrap(); |
| decoder.try_finish().unwrap(); |
| writer = decoder.finish().unwrap(); |
| let return_string = String::from_utf8(writer).expect("String parsing error"); |
| assert_eq!(return_string, STR); |
| } |
| |
| #[test] |
| fn decode_writer_partial_header() { |
| let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| e.write(STR.as_ref()).unwrap(); |
| let bytes = e.finish().unwrap(); |
| |
| let mut writer = Vec::new(); |
| let mut decoder = GzDecoder::new(writer); |
| assert_eq!(decoder.write(&bytes[..5]).unwrap(), 5); |
| let n = decoder.write(&bytes[5..]).unwrap(); |
| if n < bytes.len() - 5 { |
| decoder.write(&bytes[n + 5..]).unwrap(); |
| } |
| writer = decoder.finish().unwrap(); |
| let return_string = String::from_utf8(writer).expect("String parsing error"); |
| assert_eq!(return_string, STR); |
| } |
| |
| #[test] |
| fn decode_writer_partial_header_filename() { |
| let filename = "test.txt"; |
| let mut e = GzBuilder::new() |
| .filename(filename) |
| .read(STR.as_bytes(), Compression::default()); |
| let mut bytes = Vec::new(); |
| e.read_to_end(&mut bytes).unwrap(); |
| |
| let mut writer = Vec::new(); |
| let mut decoder = GzDecoder::new(writer); |
| assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); |
| let n = decoder.write(&bytes[12..]).unwrap(); |
| if n < bytes.len() - 12 { |
| decoder.write(&bytes[n + 12..]).unwrap(); |
| } |
| assert_eq!( |
| decoder.header().unwrap().filename().unwrap(), |
| filename.as_bytes() |
| ); |
| writer = decoder.finish().unwrap(); |
| let return_string = String::from_utf8(writer).expect("String parsing error"); |
| assert_eq!(return_string, STR); |
| } |
| |
| #[test] |
| fn decode_writer_partial_header_comment() { |
| let comment = "test comment"; |
| let mut e = GzBuilder::new() |
| .comment(comment) |
| .read(STR.as_bytes(), Compression::default()); |
| let mut bytes = Vec::new(); |
| e.read_to_end(&mut bytes).unwrap(); |
| |
| let mut writer = Vec::new(); |
| let mut decoder = GzDecoder::new(writer); |
| assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12); |
| let n = decoder.write(&bytes[12..]).unwrap(); |
| if n < bytes.len() - 12 { |
| decoder.write(&bytes[n + 12..]).unwrap(); |
| } |
| assert_eq!( |
| decoder.header().unwrap().comment().unwrap(), |
| comment.as_bytes() |
| ); |
| writer = decoder.finish().unwrap(); |
| let return_string = String::from_utf8(writer).expect("String parsing error"); |
| assert_eq!(return_string, STR); |
| } |
| |
| #[test] |
| fn decode_writer_exact_header() { |
| let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| e.write(STR.as_ref()).unwrap(); |
| let bytes = e.finish().unwrap(); |
| |
| let mut writer = Vec::new(); |
| let mut decoder = GzDecoder::new(writer); |
| assert_eq!(decoder.write(&bytes[..10]).unwrap(), 10); |
| decoder.write(&bytes[10..]).unwrap(); |
| writer = decoder.finish().unwrap(); |
| let return_string = String::from_utf8(writer).expect("String parsing error"); |
| assert_eq!(return_string, STR); |
| } |
| |
| #[test] |
| fn decode_writer_partial_crc() { |
| let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| e.write(STR.as_ref()).unwrap(); |
| let bytes = e.finish().unwrap(); |
| |
| let mut writer = Vec::new(); |
| let mut decoder = GzDecoder::new(writer); |
| let l = bytes.len() - 5; |
| let n = decoder.write(&bytes[..l]).unwrap(); |
| decoder.write(&bytes[n..]).unwrap(); |
| writer = decoder.finish().unwrap(); |
| let return_string = String::from_utf8(writer).expect("String parsing error"); |
| assert_eq!(return_string, STR); |
| } |
| |
| // Two or more gzip files concatenated form a multi-member gzip file. MultiGzDecoder will |
| // concatenate the decoded contents of all members. |
| #[test] |
| fn decode_multi_writer() { |
| let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| e.write(STR.as_ref()).unwrap(); |
| let bytes = e.finish().unwrap().repeat(2); |
| |
| let mut writer = Vec::new(); |
| let mut decoder = MultiGzDecoder::new(writer); |
| let mut count = 0; |
| while count < bytes.len() { |
| let n = decoder.write(&bytes[count..]).unwrap(); |
| assert!(n != 0); |
| count += n; |
| } |
| writer = decoder.finish().unwrap(); |
| let return_string = String::from_utf8(writer).expect("String parsing error"); |
| let expected = STR.repeat(2); |
| assert_eq!(return_string, expected); |
| } |
| |
| // GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any |
| // additional data to be consumed by the caller. |
| #[test] |
| fn decode_extra_data() { |
| let compressed = { |
| let mut e = GzEncoder::new(Vec::new(), Compression::default()); |
| e.write(STR.as_ref()).unwrap(); |
| let mut b = e.finish().unwrap(); |
| b.push(b'x'); |
| b |
| }; |
| |
| let mut writer = Vec::new(); |
| let mut decoder = GzDecoder::new(writer); |
| let mut consumed_bytes = 0; |
| loop { |
| let n = decoder.write(&compressed[consumed_bytes..]).unwrap(); |
| if n == 0 { |
| break; |
| } |
| consumed_bytes += n; |
| } |
| writer = decoder.finish().unwrap(); |
| let actual = String::from_utf8(writer).expect("String parsing error"); |
| assert_eq!(actual, STR); |
| assert_eq!(&compressed[consumed_bytes..], b"x"); |
| } |
| } |