diff --git a/.gitignore b/.gitignore index 449fd6b2..7fc0e2b8 100644 --- a/.gitignore +++ b/.gitignore @@ -136,4 +136,3 @@ dmypy.json #Added by cargo /target -Cargo.lock diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 00000000..e1d48ff6 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,494 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "brotli-sys" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4445dea95f4c2b41cde57cc9fee236ae4dbae88d8fcbdb4750fc1bb5d86aaecd" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "brotli2" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cb036c3eade309815c15ddbacec5b22c4d1f3983a774ab2eac2e3e9ea85568e" +dependencies = [ + "brotli-sys", + "libc", +] + +[[package]] +name = "cc" +version = "1.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d26a6ce4b6a484fa3edb70f7efa6fc430fd2b87285fe8b84304fd0936faa0dc0" +dependencies = [ + "jobserver", +] + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "cramjam" +version = "2.4.0-rc1" +dependencies = [ + "brotli2", + "flate2", + "lz4", + "mimalloc", + "numpy", + "pyo3", + "snap", + "zstd", +] + +[[package]] +name = "crc32fast" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "flate2" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80edafed416a46fb378521624fab1cfa2eb514784fd8921adbe8a8d8321da811" +dependencies = [ + "cfg-if 1.0.0", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "indoc" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47741a8bc60fb26eb8d6e0238bbb26d8575ff623fdc97b1a2c00c050b9684ed8" +dependencies = [ + "indoc-impl", + "proc-macro-hack", +] + +[[package]] +name = "indoc-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce046d161f000fffde5f432a0d034d0341dc152643b2598ed5bfce44c4f3a8f0" +dependencies = [ + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", + "unindent", +] + +[[package]] +name = "instant" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee0328b1209d157ef001c94dd85b4f8f64139adb0eac2659f4b08382b2f474d" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "jobserver" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" +dependencies = [ + "libc", +] + +[[package]] +name = "libc" +version = "0.2.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cb00336871be5ed2c8ed44b60ae9959dc5b9f08539422ed43f09e34ecaeba21" + +[[package]] +name = "libmimalloc-sys" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d1b8479c593dba88c2741fc50b92e13dbabbbe0bd504d979f244ccc1a5b1c01" +dependencies = [ + "cc", +] + +[[package]] +name = "lock_api" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712a4d093c9976e24e7dbca41db895dabcbac38eb5f4045393d17a95bdfb1109" +dependencies = [ + "scopeguard", +] + +[[package]] +name = "lz4" +version = "1.23.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aac20ed6991e01bf6a2e68cc73df2b389707403662a8ba89f68511fb340f724c" +dependencies = [ + "libc", + "lz4-sys", +] + +[[package]] +name = "lz4-sys" +version = "1.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dca79aa95d8b3226213ad454d328369853be3a1382d89532a854f4d69640acae" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "matrixmultiply" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a8a15b776d9dfaecd44b03c5828c2199cddff5247215858aac14624f8d6b741" +dependencies = [ + "rawpointer", +] + +[[package]] +name = "mimalloc" +version = "0.1.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb74897ce508e6c49156fd1476fc5922cbc6e75183c65e399c765a09122e5130" +dependencies = [ + "libmimalloc-sys", +] + +[[package]] +name = "miniz_oxide" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +dependencies = [ + "adler", + "autocfg", +] + +[[package]] +name = "ndarray" +version = "0.15.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08e854964160a323e65baa19a0b1a027f76d590faba01f05c0cbc3187221a8c9" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "num-complex" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26873667bbbb7c5182d4a37c1add32cdf09f841af72da53318fdb81543c15085" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "numpy" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09c15af63aa0c74e0f7230d4e95d9a3d71a23449905f30f50b055df9a6a6a3e6" +dependencies = [ + "cfg-if 0.1.10", + "libc", + "ndarray", + "num-complex", + "num-traits", + "pyo3", +] + +[[package]] +name = "once_cell" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" + +[[package]] +name = "parking_lot" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99" +dependencies = [ + "instant", + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216" +dependencies = [ + "cfg-if 1.0.0", + "instant", + "libc", + "redox_syscall", + "smallvec", + "winapi", +] + +[[package]] +name = "paste" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45ca20c77d80be666aef2b45486da86238fabe33e38306bd3118fe4af33fa880" +dependencies = [ + "paste-impl", + "proc-macro-hack", +] + +[[package]] +name = "paste-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d95a7db200b97ef370c8e6de0088252f7e0dfff7d047a28528e47456c0fc98b6" +dependencies = [ + "proc-macro-hack", +] + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro2" +version = "1.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9f5105d4fdaab20335ca9565e106a5d9b82b6219b5ba735731124ac6711d23d" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "pyo3" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35100f9347670a566a67aa623369293703322bb9db77d99d7df7313b575ae0c8" +dependencies = [ + "cfg-if 1.0.0", + "indoc", + "libc", + "parking_lot", + "paste", + "pyo3-build-config", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d12961738cacbd7f91b7c43bc25cfeeaa2698ad07a04b3be0aa88b950865738f" +dependencies = [ + "once_cell", +] + +[[package]] +name = "pyo3-macros" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0bc5215d704824dfddddc03f93cb572e1155c68b6761c37005e1c288808ea8" +dependencies = [ + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71623fc593224afaab918aa3afcaf86ed2f43d34f6afde7f3922608f253240df" +dependencies = [ + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "redox_syscall" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +dependencies = [ + "bitflags", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + +[[package]] +name = "snap" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" + +[[package]] +name = "syn" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7f58f7e8eaa0009c5fec437aabf511bd9933e4b2d7407bd05273c01a8906ea7" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "unindent" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f14ee04d9415b52b3aeab06258a3f07093182b88ba0f9b8d203f211a7a7d41c7" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "zstd" +version = "0.9.0+zstd.1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07749a5dc2cb6b36661290245e350f15ec3bbb304e493db54a1d354480522ccd" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "4.1.1+zstd.1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c91c90f2c593b003603e5e0493c837088df4469da25aafff8bce42ba48caf079" +dependencies = [ + "libc", + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "1.6.1+zstd.1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "615120c7a2431d16cf1cf979e7fc31ba7a5b5e5707b29c8a99e5dbf8a8392a33" +dependencies = [ + "cc", + "libc", +] diff --git a/Cargo.toml b/Cargo.toml index af8c02c9..d68ade80 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cramjam" -version = "2.3.2" +version = "2.4.0-rc1" authors = ["Miles Granger "] edition = "2018" license-file = "LICENSE" diff --git a/src/brotli.rs b/src/brotli.rs index e6e06ee0..b81c0f68 100644 --- a/src/brotli.rs +++ b/src/brotli.rs @@ -7,11 +7,14 @@ use pyo3::wrap_pyfunction; use pyo3::PyResult; use std::io::Cursor; +const DEFAULT_COMPRESSION_LEVEL: u32 = 11; + pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; m.add_function(wrap_pyfunction!(decompress, m)?)?; m.add_function(wrap_pyfunction!(compress_into, m)?)?; m.add_function(wrap_pyfunction!(decompress_into, m)?)?; + m.add_class::()?; Ok(()) } @@ -53,8 +56,37 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult>>>, +} + +#[pymethods] +impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); + let inner = brotli2::write::BrotliEncoder::new(Cursor::new(vec![]), level); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + } +} + pub(crate) mod internal { + use crate::brotli::DEFAULT_COMPRESSION_LEVEL; use brotli2::read::{BrotliDecoder, BrotliEncoder}; use std::io::prelude::*; use std::io::Error; @@ -68,7 +100,7 @@ pub(crate) mod internal { /// Compress via Brotli pub fn compress(input: R, output: &mut W, level: Option) -> Result { - let level = level.unwrap_or_else(|| 11); + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); let mut encoder = BrotliEncoder::new(input, level); let n_bytes = std::io::copy(&mut encoder, output)?; Ok(n_bytes as usize) diff --git a/src/deflate.rs b/src/deflate.rs index c8e7d4e0..10fc37a6 100644 --- a/src/deflate.rs +++ b/src/deflate.rs @@ -7,11 +7,14 @@ use pyo3::wrap_pyfunction; use pyo3::PyResult; use std::io::Cursor; +const DEFAULT_COMPRESSION_LEVEL: u32 = 6; + pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; m.add_function(wrap_pyfunction!(decompress, m)?)?; m.add_function(wrap_pyfunction!(compress_into, m)?)?; m.add_function(wrap_pyfunction!(decompress_into, m)?)?; + m.add_class::()?; Ok(()) } @@ -53,8 +56,38 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult>>>, +} + +#[pymethods] +impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); + let compression = flate2::Compression::new(level); + let inner = flate2::write::DeflateEncoder::new(Cursor::new(vec![]), compression); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + } +} + pub(crate) mod internal { + use crate::deflate::DEFAULT_COMPRESSION_LEVEL; use flate2::read::{DeflateDecoder, DeflateEncoder}; use flate2::Compression; use std::io::prelude::*; @@ -69,7 +102,7 @@ pub(crate) mod internal { /// Compress gzip data pub fn compress(input: R, output: &mut W, level: Option) -> Result { - let level = level.unwrap_or_else(|| 6); + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); let mut encoder = DeflateEncoder::new(input, Compression::new(level)); let n_bytes = std::io::copy(&mut encoder, output)?; diff --git a/src/gzip.rs b/src/gzip.rs index ebf416fb..12a9fc47 100644 --- a/src/gzip.rs +++ b/src/gzip.rs @@ -7,11 +7,14 @@ use pyo3::wrap_pyfunction; use pyo3::PyResult; use std::io::Cursor; +const DEFAULT_COMPRESSION_LEVEL: u32 = 6; + pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; m.add_function(wrap_pyfunction!(decompress, m)?)?; m.add_function(wrap_pyfunction!(compress_into, m)?)?; m.add_function(wrap_pyfunction!(decompress_into, m)?)?; + m.add_class::()?; Ok(()) } @@ -53,24 +56,72 @@ pub fn decompress_into(input: BytesType, mut output: BytesType) -> PyResult>>>, +} + +#[pymethods] +impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + pub fn __init__(level: Option) -> PyResult { + let level = level.unwrap_or(DEFAULT_COMPRESSION_LEVEL); + let inner = flate2::write::GzEncoder::new(Cursor::new(vec![]), flate2::Compression::new(level)); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|c| c.into_inner())) + } +} + pub(crate) mod internal { - use flate2::read::{GzDecoder, GzEncoder}; + use crate::gzip::DEFAULT_COMPRESSION_LEVEL; + use flate2::read::{GzEncoder, MultiGzDecoder}; use flate2::Compression; use std::io::prelude::*; - use std::io::Error; + use std::io::{Cursor, Error}; /// Decompress gzip data pub fn decompress(input: R, output: &mut W) -> Result { - let mut decoder = GzDecoder::new(input); - let n_bytes = std::io::copy(&mut decoder, output)?; + let mut decoder = MultiGzDecoder::new(input); + let mut out = vec![]; + let n_bytes = decoder.read_to_end(&mut out)?; + std::io::copy(&mut Cursor::new(out.as_slice()), output)?; Ok(n_bytes as usize) } /// Compress gzip data pub fn compress(input: R, output: &mut W, level: Option) -> Result { - let level = level.unwrap_or_else(|| 6); + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); let mut encoder = GzEncoder::new(input, Compression::new(level)); let n_bytes = std::io::copy(&mut encoder, output)?; Ok(n_bytes as usize) } + + #[cfg(test)] + mod tests { + + #[test] + fn test_gzip_multiple_streams() { + let mut out1 = vec![]; + let mut out2 = vec![]; + super::compress(b"foo".to_vec().as_slice(), &mut out1, None).unwrap(); + super::compress(b"bar".to_vec().as_slice(), &mut out2, None).unwrap(); + + let mut out3 = vec![]; + out1.extend_from_slice(&out2); + super::decompress(out1.as_slice(), &mut out3).unwrap(); + assert_eq!(out3, b"foobar".to_vec()); + } + } } diff --git a/src/io.rs b/src/io.rs index b7b79396..9dab764b 100644 --- a/src/io.rs +++ b/src/io.rs @@ -5,6 +5,7 @@ use std::fs::{File, OpenOptions}; use std::io::{copy, Cursor, Read, Seek, SeekFrom, Write}; +use crate::exceptions::CompressionError; use crate::BytesType; use numpy::PyArray1; use pyo3::class::buffer::PyBufferProtocol; @@ -595,3 +596,42 @@ impl Read for RustyFile { self.inner.read(buf) } } + +// general stream compression interface. Can't use associated types due to pyo3::pyclass +// not supporting generic structs. +#[inline(always)] +pub(crate) fn stream_compress(encoder: &mut Option, input: &[u8]) -> PyResult { + match encoder { + Some(encoder) => { + let result = std::io::copy(&mut Cursor::new(input), encoder).map(|v| v as usize); + crate::to_py_err!(CompressionError -> result) + } + None => Err(CompressionError::new_err( + "Compressor looks to have been consumed via `finish()`. \ + please create a new compressor instance.", + )), + } +} + +// general stream finish interface. Can't use associated types due to pyo3::pyclass +// not supporting generic structs. +#[inline(always)] +pub(crate) fn stream_finish(encoder: &mut Option, into_vec: F) -> PyResult +where + W: Write, + E: ToString, + F: Fn(W) -> Result, E>, +{ + // &mut encoder is part of a Compressor, often the .finish portion consumes + // the struct; which cannot be done with pyclass. So we'll swap it out for None + let mut detached_encoder = None; + std::mem::swap(&mut detached_encoder, encoder); + + match detached_encoder { + Some(encoder) => { + let result = crate::to_py_err!(CompressionError -> into_vec(encoder))?; + Ok(RustyBuffer::from(result)) + } + None => Ok(RustyBuffer::from(vec![])), + } +} diff --git a/src/lib.rs b/src/lib.rs index 6e74c69f..44851785 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -89,7 +89,7 @@ pub enum BytesType<'a> { RustyBuffer(&'a PyCell), /// `numpy.array` with `dtype=np.uint8` #[pyo3(transparent, annotation = "numpy")] - NumpyArray(RustyNumpyArray<'a>) + NumpyArray(RustyNumpyArray<'a>), } impl<'a> AsBytes for BytesType<'a> { diff --git a/src/lz4.rs b/src/lz4.rs index 2bd6d5d4..07881ae9 100644 --- a/src/lz4.rs +++ b/src/lz4.rs @@ -7,6 +7,8 @@ use pyo3::wrap_pyfunction; use pyo3::PyResult; use std::io::Cursor; +const DEFAULT_COMPRESSION_LEVEL: u32 = 4; + pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; m.add_function(wrap_pyfunction!(decompress, m)?)?; @@ -14,6 +16,7 @@ pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(decompress_block, m)?)?; m.add_function(wrap_pyfunction!(compress_into, m)?)?; m.add_function(wrap_pyfunction!(decompress_into, m)?)?; + m.add_class::()?; Ok(()) } @@ -117,7 +120,41 @@ pub fn compress_block( Ok(RustyBuffer::from(out)) } +/// Snappy Compressor object for streaming compression +#[pyclass] +pub struct Compressor { + inner: Option>>>, +} + +#[pymethods] +impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + pub fn __init__(level: Option) -> PyResult { + let inner = lz4::EncoderBuilder::new() + .auto_flush(true) + .level(level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL)) + .build(Cursor::new(vec![]))?; + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| { + let (cursor, result) = inner.finish(); + result.map(|_| cursor.into_inner()) + }) + } +} + pub(crate) mod internal { + use crate::lz4::DEFAULT_COMPRESSION_LEVEL; use lz4::{Decoder, EncoderBuilder}; use std::io::{Error, Read, Seek, SeekFrom, Write}; @@ -138,7 +175,7 @@ pub(crate) mod internal { let start_pos = output.seek(SeekFrom::Current(0))?; let mut encoder = EncoderBuilder::new() .auto_flush(true) - .level(level.unwrap_or_else(|| 4)) + .level(level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL)) .build(output)?; // this returns, bytes read from uncompressed, input; we want bytes written diff --git a/src/snappy.rs b/src/snappy.rs index e83181ba..b5ac7474 100644 --- a/src/snappy.rs +++ b/src/snappy.rs @@ -18,6 +18,7 @@ pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(decompress_raw_into, m)?)?; m.add_function(wrap_pyfunction!(compress_raw_max_len, m)?)?; m.add_function(wrap_pyfunction!(decompress_raw_len, m)?)?; + m.add_class::()?; Ok(()) } @@ -123,6 +124,33 @@ pub fn decompress_raw_len(data: BytesType) -> PyResult { to_py_err!(DecompressionError -> snap::raw::decompress_len(data.as_bytes())) } +/// Snappy Compressor object for streaming compression +#[pyclass] +pub struct Compressor { + inner: Option>>>, +} + +#[pymethods] +impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + pub fn __init__() -> PyResult { + let inner = snap::write::FrameEncoder::new(Cursor::new(vec![])); + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.into_inner().map(|c| c.into_inner())) + } +} + pub(crate) mod internal { use snap::read::{FrameDecoder, FrameEncoder}; use std::io::{Error, Read, Write}; diff --git a/src/zstd.rs b/src/zstd.rs index bceee059..9c0eb3ec 100644 --- a/src/zstd.rs +++ b/src/zstd.rs @@ -7,11 +7,14 @@ use pyo3::wrap_pyfunction; use pyo3::PyResult; use std::io::Cursor; +const DEFAULT_COMPRESSION_LEVEL: i32 = 0; + pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> { m.add_function(wrap_pyfunction!(compress, m)?)?; m.add_function(wrap_pyfunction!(decompress, m)?)?; m.add_function(wrap_pyfunction!(compress_into, m)?)?; m.add_function(wrap_pyfunction!(decompress_into, m)?)?; + m.add_class::()?; Ok(()) } @@ -53,8 +56,36 @@ pub fn decompress_into<'a>(_py: Python<'a>, input: BytesType<'a>, mut output: By Ok(r) } +/// ZSTD Compressor object for streaming compression +#[pyclass] +pub struct Compressor { + inner: Option>>>, +} + +#[pymethods] +impl Compressor { + /// Initialize a new `Compressor` instance. + #[new] + pub fn __init__(level: Option) -> PyResult { + let inner = zstd::stream::write::Encoder::new(Cursor::new(vec![]), level.unwrap_or(DEFAULT_COMPRESSION_LEVEL))?; + Ok(Self { inner: Some(inner) }) + } + + /// Compress input into the current compressor's stream. + pub fn compress(&mut self, input: &[u8]) -> PyResult { + crate::io::stream_compress(&mut self.inner, input) + } + + /// Consume the current compressor state and return the compressed stream + /// **NB** The compressor will not be usable after this method is called. + pub fn finish(&mut self) -> PyResult { + crate::io::stream_finish(&mut self.inner, |inner| inner.finish().map(|v| v.into_inner())) + } +} + pub(crate) mod internal { + use crate::zstd::DEFAULT_COMPRESSION_LEVEL; use std::io::{Error, Read, Write}; /// Decompress gzip data @@ -66,7 +97,7 @@ pub(crate) mod internal { /// Compress gzip data pub fn compress(input: R, output: &mut W, level: Option) -> Result { - let level = level.unwrap_or_else(|| 0); // 0 will use zstd's default, currently 3 + let level = level.unwrap_or_else(|| DEFAULT_COMPRESSION_LEVEL); // 0 will use zstd's default, currently 3 let mut encoder = zstd::stream::read::Encoder::new(input, level)?; let n_bytes = std::io::copy(&mut encoder, output)?; Ok(n_bytes as usize) diff --git a/tests/test_variants.py b/tests/test_variants.py index 85940fa4..ab0d2fd0 100644 --- a/tests/test_variants.py +++ b/tests/test_variants.py @@ -1,3 +1,4 @@ +import gzip import pytest import numpy as np import cramjam @@ -221,3 +222,48 @@ def test_lz4_block(compress_kwargs): output_len=len(data) if not compress_kwargs["store_size"] else None, ) assert bytes(out) == data + + +def test_gzip_multiple_streams(): + + out1 = gzip.compress(b"foo") + out2 = gzip.compress(b"bar") + assert gzip.decompress(out1 + out2) == b"foobar" + + # works with data compressed by std gzip lib + out = bytes(cramjam.gzip.decompress(out1 + out2)) + assert out == b"foobar" + + # works with data compressed by cramjam + o1 = bytes(cramjam.gzip.compress(b"foo")) + o2 = bytes(cramjam.gzip.compress(b"bar")) + out = bytes(cramjam.gzip.decompress(o1 + o2)) + assert out == b"foobar" + + +@pytest.mark.parametrize( + "mod", + ( + cramjam.brotli, + cramjam.deflate, + cramjam.gzip, + cramjam.lz4, + cramjam.snappy, + cramjam.zstd, + ), +) +def test_streams_compressor(mod): + compressor = mod.Compressor() + compressor.compress(b"foo") + compressor.compress(b"bar") + out = compressor.finish() + decompressed = mod.decompress(out) + assert bytes(decompressed) == b"foobar" + + # just empty bytes after the first .finish() + # same behavior as brotli.Compressor() + assert bytes(compressor.finish()) == b"" + + # compress will raise an error as the stream is completed + with pytest.raises(cramjam.CompressionError): + compressor.compress(b'data')