From c3a1493ae247edcf8ceb0be6390c142a50596a87 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Thu, 17 Jul 2014 13:29:47 -0700 Subject: [PATCH] Add gzip compression/decompresion --- src/ffi.rs | 3 + src/gz.rs | 357 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 15 ++- 3 files changed, 373 insertions(+), 2 deletions(-) create mode 100644 src/gz.rs diff --git a/src/ffi.rs b/src/ffi.rs index 40a130041..67235a1a3 100644 --- a/src/ffi.rs +++ b/src/ffi.rs @@ -56,4 +56,7 @@ extern { pub fn mz_inflateInit(stream: *mut mz_stream) -> libc::c_int; pub fn mz_inflate(stream: *mut mz_stream, flush: libc::c_int) -> libc::c_int; pub fn mz_inflateEnd(stream: *mut mz_stream) -> libc::c_int; + + pub fn mz_crc32(crc: libc::c_ulong, ptr: *const u8, + len: libc::size_t) -> libc::c_ulong; } diff --git a/src/gz.rs b/src/gz.rs new file mode 100644 index 000000000..79816f9da --- /dev/null +++ b/src/gz.rs @@ -0,0 +1,357 @@ +//! gzip compression/decompression +//! +//! [1]: http://www.gzip.org/zlib/rfc-gzip.html + +use libc; +use std::c_str::CString; +use std::cmp; +use std::io::{IoResult, IoError}; +use std::io; +use std::os; +use std::slice::bytes; +use std::u16; + +use {BestCompression, CompressionLevel, BestSpeed}; +use ffi; + +static FHCRC: u8 = 1 << 1; +static FEXTRA: u8 = 1 << 2; +static FNAME: u8 = 1 << 3; +static FCOMMENT: u8 = 1 << 4; + +/// A gzip streaming encoder +/// +/// This structure exposes a `Writer` interface that will emit compressed data +/// to the underlying writer `W`. +pub struct Encoder { + inner: ::Encoder, + crc: libc::c_ulong, + amt: u32, + extra: Option>, + filename: Option, + comment: Option, + wrote_header: bool, + mtime: u32, + xfl: u8, +} + +/// A gzip streaming decoder +/// +/// This structure exposes a `Reader` interface that will consume compressed +/// data from the underlying reader and emit uncompressed data. +pub struct Decoder { + inner: ::Decoder, + crc: libc::c_ulong, + amt: u32, + extra: Option>, + filename: Option>, + comment: Option>, + mtime: u32, +} + +impl Encoder { + /// Creates a new encoder which will use the given compression level. + /// + /// No data is written at this time, and the gzip header can be configured + /// before the first call to `write()` by invoking the other instance + /// methods of this encoder. + pub fn new(w: W, level: CompressionLevel) -> Encoder { + Encoder { + inner: ::Encoder::new(w, level), + crc: 0, + amt: 0, + wrote_header: false, + extra: None, + filename: None, + comment: None, + mtime: 0, + xfl: match level { + BestCompression => 2, + BestSpeed => 4, + _ => 0, + } + } + } + + /// Configure the `mtime` field in the gzip header. + /// + /// This function will return an error if the header has already been + /// written. + pub fn mtime(&mut self, mtime: u32) -> IoResult<()> { + if self.wrote_header { + Err(io::standard_error(io::OtherIoError)) + } else { + self.mtime = mtime; + Ok(()) + } + } + + /// Configure the `extra` field in the gzip header. + /// + /// This function will return an error if the header has already been + /// written. + pub fn extra(&mut self, extra: Vec) -> IoResult<()> { + if self.wrote_header || extra.len() >= u16::MAX as uint { + Err(io::standard_error(io::OtherIoError)) + } else { + self.extra = Some(extra); + Ok(()) + } + } + + /// Configure the `filename` field in the gzip header. + /// + /// This function will return an error if the header has already been + /// written. + pub fn filename(&mut self, filename: T) -> IoResult<()> { + if self.wrote_header { + Err(io::standard_error(io::OtherIoError)) + } else { + self.filename = Some(filename.to_c_str()); + Ok(()) + } + } + + /// Configure the `comment` field in the gzip header. + /// + /// This function will return an error if the header has already been + /// written. + pub fn comment(&mut self, comment: T) -> IoResult<()> { + if self.wrote_header { + Err(io::standard_error(io::OtherIoError)) + } else { + self.comment = Some(comment.to_c_str()); + Ok(()) + } + } + + fn write_header(&mut self) -> IoResult<()> { + let w = self.inner.inner.get_mut_ref(); + try!(w.write_u8(0x1f)); + try!(w.write_u8(0x8b)); + try!(w.write_u8(8)); + let flg = if self.extra.is_some() {FEXTRA} else {0} | + if self.filename.is_some() {FNAME} else {0} | + if self.comment.is_some() {FCOMMENT} else {0}; + try!(w.write_u8(flg)); + try!(w.write_le_u32(self.mtime)); + try!(w.write_u8(self.xfl)); + try!(w.write_u8(match os::consts::SYSNAME { + "linux" => 3, + "macos" => 7, + "win32" => 0, + _ => 255, + })); + + match self.extra { + Some(ref vec) => { + try!(w.write_le_u16(vec.len() as u16)); + try!(w.write(vec.as_slice())); + } + None => {} + } + match self.filename { + Some(ref cstr) => try!(w.write(cstr.as_bytes())), + None => {} + } + match self.comment { + Some(ref cstr) => try!(w.write(cstr.as_bytes())), + None => {} + } + Ok(()) + } + + /// Finish encoding this stream, returning the underlying writer once the + /// encoding is done. + pub fn finish(mut self) -> IoResult { + self.do_finish() + } + + fn do_finish(&mut self) -> IoResult { + try!(self.inner.do_finish()); + let mut inner = self.inner.inner.take().unwrap(); + try!(inner.write_le_u32(self.crc as u32)); + try!(inner.write_le_u32(self.amt)); + Ok(inner) + } +} + +impl Writer for Encoder { + fn write(&mut self, buf: &[u8]) -> IoResult<()> { + if !self.wrote_header { + self.wrote_header = true; + try!(self.write_header()); + } + try!(self.inner.write(buf)); + self.crc = unsafe { + ffi::mz_crc32(self.crc, buf.as_ptr(), buf.len() as libc::size_t) + }; + self.amt += buf.len() as u32; + Ok(()) + } + + fn flush(&mut self) -> IoResult<()> { self.inner.flush() } +} + +#[unsafe_destructor] +impl Drop for Encoder { + fn drop(&mut self) { + if self.inner.inner.is_some() { + let _ = self.do_finish(); + } + } +} + +impl Decoder { + /// Creates a new decoder from the given reader, immediately parsing the + /// gzip header. + /// + /// If an error is encountered when parsing the gzip header, an error is + /// returned. + pub fn new(mut r: R) -> IoResult> { + let id1 = try!(r.read_u8()); + let id2 = try!(r.read_u8()); + if id1 != 0x1f || id2 != 0x8b { return Err(bad_header()) } + let cm = try!(r.read_u8()); + if cm != 8 { return Err(bad_header()) } + + let flg = try!(r.read_u8()); + let mtime = try!(r.read_le_u32()); + let _xfl = try!(r.read_u8()); + let _os = try!(r.read_u8()); + + let extra = if flg & FEXTRA != 0 { + let xlen = try!(r.read_le_u16()); + Some(try!(r.read_exact(xlen as uint))) + } else { + None + }; + let filename = if flg & FNAME != 0 { + // wow this is slow + let mut b = Vec::new(); + for byte in r.bytes() { + let byte = try!(byte); + if byte == 0 { break } + b.push(byte); + } + Some(b) + } else { + None + }; + let comment = if flg & FCOMMENT != 0 { + // wow this is slow + let mut b = Vec::new(); + for byte in r.bytes() { + let byte = try!(byte); + if byte == 0 { break } + b.push(byte); + } + Some(b) + } else { + None + }; + + if flg & FHCRC != 0 { + try!(r.read_le_u16()); + } + + return Ok(Decoder { + inner: ::Decoder::new(r), + crc: 0, + amt: 0, + extra: extra, + filename: filename, + comment: comment, + mtime: mtime, + }); + + fn bad_header() -> IoError { + IoError { + kind: io::InvalidInput, + desc: "invalid gzip header", + detail: None, + } + } + } + + /// Returns the `filename` field of this gzip stream's header, if present. + pub fn filename<'a>(&'a self) -> Option<&'a [u8]> { + self.filename.as_ref().map(|s| s.as_slice()) + } + /// Returns the `extra` field of this gzip stream's header, if present. + pub fn extra<'a>(&'a self) -> Option<&'a [u8]> { + self.extra.as_ref().map(|s| s.as_slice()) + } + /// Returns the `comment` field of this gzip stream's header, if present. + pub fn comment<'a>(&'a self) -> Option<&'a [u8]> { + self.comment.as_ref().map(|s| s.as_slice()) + } + /// Returns the `mtime` field of this gzip stream's header, if present. + pub fn mtime(&self) -> u32 { self.mtime } + + fn finish(&mut self) -> IoResult<()> { + let mut buf = [0u8, ..8]; + let remaining = self.inner.buf.slice_from(self.inner.buf.len()); + let len = cmp::min(remaining.len(), buf.len()); + bytes::copy_memory(buf, remaining.slice_to(len)); + + if len < buf.len() { + try!(self.inner.inner.read_at_least(buf.len() - len, buf)); + } + + let crc = (buf[0] as u32 << 0) | + (buf[1] as u32 << 8) | + (buf[2] as u32 << 16) | + (buf[3] as u32 << 24); + let amt = (buf[4] as u32 << 0) | + (buf[5] as u32 << 8) | + (buf[6] as u32 << 16) | + (buf[7] as u32 << 24); + if crc != self.crc as u32 { return Err(corrupt()) } + if amt != self.amt { return Err(corrupt()) } + Ok(()) + } +} + +impl Reader for Decoder { + fn read(&mut self, into: &mut [u8]) -> IoResult { + let amt = match self.inner.read(into) { + Ok(amt) => amt, + Err(e) => { + if e.kind == io::EndOfFile { + try!(self.finish()); + } + return Err(e) + } + }; + self.amt += amt as u32; + self.crc = unsafe { + ffi::mz_crc32(self.crc, into.as_ptr(), amt as libc::size_t) + }; + Ok(amt) + } +} + +fn corrupt() -> IoError { + IoError { + kind: io::OtherIoError, + desc: "corrupt gzip stream does not have a matching checksum", + detail: None, + } +} + +#[cfg(test)] +mod tests { + use super::{Encoder, Decoder}; + use {Default}; + use std::io::{MemWriter, MemReader}; + + #[test] + fn roundtrip() { + let mut e = Encoder::new(MemWriter::new(), Default); + e.write(b"foo bar baz").unwrap(); + let inner = e.finish().unwrap(); + let mut d = Decoder::new(MemReader::new(inner.unwrap())); + assert_eq!(d.read_to_string().unwrap().as_slice(), "foo bar baz"); + } +} diff --git a/src/lib.rs b/src/lib.rs index cb03f2e3c..b6751df0e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -18,6 +18,7 @@ use std::io::IoResult; use std::mem; mod ffi; +pub mod gz; /// A DEFLATE encoder, or compressor. /// @@ -131,7 +132,9 @@ impl Writer for Encoder { } fn flush(&mut self) -> IoResult<()> { - self.deflate([], ffi::MZ_SYNC_FLUSH) + self.deflate([], ffi::MZ_SYNC_FLUSH).and_then(|_| { + self.inner.get_mut_ref().flush() + }) } } @@ -149,6 +152,14 @@ impl Decoder { /// Creates a new decoder which will decompress data read from the given /// stream. pub fn new(r: R) -> Decoder { + Decoder::new_with_buf(r, Vec::with_capacity(128 * 1024)) + } + + /// Same as `new`, but the intermediate buffer for data is specified. + /// + /// Note that the capacity of the intermediate buffer is never increased, + /// and it is recommended for it to be large. + pub fn new_with_buf(r: R, buf: Vec) -> Decoder { let mut state: ffi::mz_stream = unsafe { mem::zeroed() }; let ret = unsafe { ffi::mz_inflateInit(&mut state) }; assert_eq!(ret, 0); @@ -157,7 +168,7 @@ impl Decoder { inner: r, stream: Stream(state, Inflate), pos: 0, - buf: Vec::with_capacity(128 * 1024), + buf: buf, } } }