Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor implementation and optional mimalloc #36

Merged
merged 12 commits into from
Feb 22, 2021
4 changes: 3 additions & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ name: CI

on:
push:
branches:
- master
pull_request:
release:
types:
Expand Down Expand Up @@ -180,7 +182,7 @@ jobs:
run: pip install maturin
- name: Build Wheels
run: |
maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }}
maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }} --cargo-extra-args="--no-default-features" --cargo-extra-args="--features=abi3"
- uses: uraimo/[email protected]
name: Install built wheel
with:
Expand Down
13 changes: 12 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,14 @@ description = "Thin Python bindings to de/compression algorithms in Rust"
crate-type = ["cdylib"]

[features]
default = ["abi3"]
default = ["abi3", "mimallocator"]
abi3 = ["pyo3/abi3-py36"]
mimallocator = ["mimalloc"]

[profile.release]
lto = "fat"
codegen-units = 1
opt-level = 3

[dependencies]
pyo3 = { version = "0.13.2", features = ["extension-module"] }
Expand All @@ -22,3 +28,8 @@ lz-fear = "0.1.1"
flate2 = "^1"
zstd = "0.6.0+zstd.1.4.8"
numpy = "0.13.0"

[dependencies.mimalloc]
version = "0.1.24"
default-features = false
optional = true
8 changes: 5 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@ test:
bench:
python -m pytest -v --benchmark-only --benchmark-sort name benchmarks/

bench-snappy:
$(BASE_BENCH_CMD) test_snappy
bench-snappy-framed:
$(BASE_BENCH_CMD) test_snappy_framed

bench-snappy-raw:
$(BASE_BENCH_CMD) test_snappy_raw

bench-snappy-compress-into:
$(BASE_BENCH_CMD) snappy_de_compress_into
Expand All @@ -28,7 +31,6 @@ dev-install:
rm -rf ./dist
maturin build --release --out dist --no-sdist --interpreter $(shell which python)
pip uninstall cramjam -y
rm dist/*.tar.gz
pip install cramjam --no-index --find-links dist/

pypy-build:
Expand Down
352 changes: 193 additions & 159 deletions benchmarks/README.md

Large diffs are not rendered by default.

38 changes: 33 additions & 5 deletions benchmarks/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,18 @@ def round_trip(compress, decompress, data, **kwargs):
"use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "snappy"
)
@pytest.mark.parametrize("file", FILES, ids=lambda val: val.name)
def test_snappy(benchmark, file, use_cramjam: bool):
def test_snappy_raw(benchmark, file, use_cramjam: bool):
"""
Uses snappy compression
Uses snappy compression raw
"""
import snappy

data = file.read_bytes()
data = bytearray(file.read_bytes())
if use_cramjam:
benchmark(
round_trip,
compress=cramjam.snappy.compress,
decompress=cramjam.snappy.decompress,
compress=cramjam.snappy.compress_raw,
decompress=cramjam.snappy.decompress_raw,
data=data,
)
else:
Expand All @@ -43,6 +43,34 @@ def test_snappy(benchmark, file, use_cramjam: bool):
)


@pytest.mark.parametrize(
"use_cramjam", (True, False), ids=lambda val: "cramjam" if val else "snappy"
)
@pytest.mark.parametrize("file", FILES, ids=lambda val: val.name)
def test_snappy_framed(benchmark, file, use_cramjam: bool):
"""
Uses snappy compression framed
"""
import snappy

data = bytearray(file.read_bytes())
if use_cramjam:
benchmark(
round_trip,
compress=cramjam.snappy.compress,
decompress=cramjam.snappy.decompress,
data=data,
)
else:
compressor = snappy.StreamCompressor()
decompressor = snappy.StreamDecompressor()
benchmark(
round_trip,
compress=compressor.compress,
decompress=decompressor.decompress,
data=data,
)

@pytest.mark.parametrize("op", ("decompress_into", "compress_into"))
@pytest.mark.parametrize("file", FILES, ids=lambda val: val.name)
def test_cramjam_snappy_de_compress_into(benchmark, op, file):
Expand Down
127 changes: 16 additions & 111 deletions src/brotli.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use crate::exceptions::{CompressionError, DecompressionError};
use crate::{to_py_err, BytesType, Output};
use crate::{to_py_err, BytesType, WriteablePyByteArray};
use numpy::PyArray1;
use pyo3::prelude::*;
use pyo3::types::{PyByteArray, PyBytes};
use pyo3::types::PyBytes;
use pyo3::wrap_pyfunction;
use pyo3::{PyResult, Python};
use std::io::Cursor;

pub fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
Expand All @@ -23,42 +24,7 @@ pub fn init_py_module(m: &PyModule) -> PyResult<()> {
/// ```
#[pyfunction]
pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option<usize>) -> PyResult<BytesType<'a>> {
match data {
BytesType::Bytes(input) => match output_len {
Some(len) => {
let pybytes = PyBytes::new_with(py, len, |buffer| {
let output = Output::Slice(buffer);
to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?;
Ok(())
})?;
Ok(BytesType::Bytes(pybytes))
}
None => {
let mut buffer = Vec::with_capacity(data.len() / 10);
let output = Output::Vector(&mut buffer);
to_py_err!(DecompressionError -> self::internal::decompress(input.as_bytes(), output))?;
Ok(BytesType::Bytes(PyBytes::new(py, &buffer)))
}
},
BytesType::ByteArray(input) => match output_len {
Some(len) => {
let mut size = 0;
let pybytes = PyByteArray::new_with(py, len, |buffer| {
let output = Output::Slice(buffer);
size = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?;
Ok(())
})?;
pybytes.resize(size)?;
Ok(BytesType::ByteArray(pybytes))
}
None => {
let mut buffer = Vec::with_capacity(data.len() / 10);
let output = Output::Vector(&mut buffer);
to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }, output))?;
Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer)))
}
},
}
crate::generic!(decompress(data), py = py, output_len = output_len)
}

/// Brotli compression.
Expand All @@ -75,42 +41,7 @@ pub fn compress<'a>(
level: Option<u32>,
output_len: Option<usize>,
) -> PyResult<BytesType<'a>> {
match data {
BytesType::Bytes(input) => match output_len {
Some(len) => {
let pybytes = PyBytes::new_with(py, len, |buffer| {
let output = Output::Slice(buffer);
to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?;
Ok(())
})?;
Ok(BytesType::Bytes(pybytes))
}
None => {
let mut buffer = Vec::with_capacity(data.len() / 10);
let output = Output::Vector(&mut buffer);
to_py_err!(CompressionError -> self::internal::compress(input.as_bytes(), output, level))?;
Ok(BytesType::Bytes(PyBytes::new(py, &buffer)))
}
},
BytesType::ByteArray(input) => match output_len {
Some(len) => {
let mut size = 0;
let pybytes = PyByteArray::new_with(py, len, |buffer| {
let output = Output::Slice(buffer);
size = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?;
Ok(())
})?;
pybytes.resize(size)?;
Ok(BytesType::ByteArray(pybytes))
}
None => {
let mut buffer = Vec::with_capacity(data.len() / 10);
let output = Output::Vector(&mut buffer);
to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, output, level))?;
Ok(BytesType::ByteArray(PyByteArray::new(py, &buffer)))
}
},
}
crate::generic!(compress(data), py = py, output_len = output_len, level = level)
}

/// Compress directly into an output buffer
Expand All @@ -121,59 +52,33 @@ pub fn compress_into<'a>(
array: &PyArray1<u8>,
level: Option<u32>,
) -> PyResult<usize> {
crate::de_compress_into(data.as_bytes(), array, |bytes, out| {
self::internal::compress(bytes, out, level)
})
crate::generic_into!(compress(data -> array), level)
}

/// Decompress directly into an output buffer
#[pyfunction]
pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1<u8>) -> PyResult<usize> {
crate::de_compress_into(data.as_bytes(), array, self::internal::decompress)
crate::generic_into!(decompress(data -> array))
}

pub(crate) mod internal {

use crate::Output;
use brotli2::read::{BrotliDecoder, BrotliEncoder};
use std::io::prelude::*;
use std::io::{Cursor, Error};
use std::io::Error;

/// Decompress via Brotli
pub fn decompress<'a>(data: &[u8], output: Output<'a>) -> Result<usize, Error> {
let mut decoder = BrotliDecoder::new(data);
match output {
Output::Slice(slice) => {
let mut n_bytes = 0;
loop {
let count = decoder.read(&mut slice[n_bytes..])?;
if count == 0 {
break;
}
n_bytes += count;
}
Ok(n_bytes)
}
Output::Vector(v) => decoder.read_to_end(v),
}
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
let mut decoder = BrotliDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress via Brotli
pub fn compress<'a>(data: &'a [u8], output: Output<'a>, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 11);

match output {
Output::Slice(slice) => {
let buffer = Cursor::new(slice);
let mut encoder = brotli2::write::BrotliEncoder::new(buffer, level);
encoder.write_all(data)?;
let buffer = encoder.finish()?;
Ok(buffer.position() as usize)
}
Output::Vector(v) => {
let mut encoder = BrotliEncoder::new(data, level);
encoder.read_to_end(v)
}
}
let mut encoder = BrotliEncoder::new(input, level);
let n_bytes = std::io::copy(&mut encoder, output)?;
Ok(n_bytes as usize)
}
}
Loading