Skip to content

Commit

Permalink
Rust file-like objects and accept bytes/bytearray/numpy (#45)
Browse files Browse the repository at this point in the history
Adds support for all de/compression variants to accept 
bytes/bytearray/numpy/cramjam.File/cramjam.Buffer objects.

Additionally, the de/compress_into entry points, can be mix-matched
to allow any combination of input and output types.
  • Loading branch information
milesgranger authored Mar 18, 2021
1 parent 37306e4 commit 16b78f5
Show file tree
Hide file tree
Showing 15 changed files with 1,098 additions and 356 deletions.
50 changes: 16 additions & 34 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ jobs:
profile: minimal
default: true
- name: Build
run: cargo build
run: cargo build --release
- name: Tests
run: cargo test
run: cargo test --no-default-features --release
- name: Install maturin
run: pip install maturin
- name: Build wheels - x86_64
Expand Down Expand Up @@ -78,10 +78,10 @@ jobs:
default: true
- name: Build
if: matrix.platform.python-architecture == 'x64'
run: cargo build
run: cargo build --release
- name: Tests
if: matrix.platform.python-architecture == 'x64'
run: cargo test
run: cargo test --no-default-features --release
- name: Install maturin
run: pip install maturin
- name: Build wheels
Expand Down Expand Up @@ -115,9 +115,9 @@ jobs:
profile: minimal
default: true
- name: Build
run: cargo build
run: cargo build --release
- name: Tests
run: cargo test
run: cargo test --no-default-features
- uses: actions/setup-python@v2
with:
python-version: 3.6
Expand All @@ -129,7 +129,6 @@ jobs:
pip install maturin
maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }}
' > build-wheel.sh
chmod +x build-wheel.sh
docker run --rm -v "$PWD":/io -w /io quay.io/pypa/manylinux${{ matrix.platform.manylinux }}_${{ matrix.platform.arch }} bash build-wheel.sh
- name: Python UnitTest
Expand All @@ -145,7 +144,7 @@ jobs:
path: dist

linux-cross:
runs-on: ubuntu-16.04
runs-on: ubuntu-latest
strategy:
matrix:
platform: [
Expand All @@ -157,32 +156,15 @@ jobs:
- uses: actions/setup-python@v2
with:
python-version: 3.6
- name: Install Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
target: ${{ matrix.platform.target }}
profile: minimal
default: true
- name: Install aarch64 cross compiler
if: matrix.platform.target == 'aarch64-unknown-linux-gnu'
run: |
sudo apt-get install -y gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross
echo "TARGET_CC=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV"
echo "TARGET_CXX=aarch64-linux-gnu-cpp" >> "$GITHUB_ENV"
echo "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV"
- name: Install armv7 cross compiler
if: matrix.platform.target == 'armv7-unknown-linux-gnueabihf'
run: |
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
echo "TARGET_CC=arm-linux-gnueabihf-gcc" >> "$GITHUB_ENV"
echo "TARGET_CXX=arm-linux-gnueabihf-cpp" >> "$GITHUB_ENV"
echo "CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc" >> "$GITHUB_ENV"
- name: Install maturin
run: pip install maturin
- name: Build Wheels
run: |
maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }} --cargo-extra-args="--no-default-features" --cargo-extra-args="--features=abi3"
echo 'curl -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
source ~/.cargo/env
rustup target add ${{ matrix.platform.target }}
maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }} --cargo-extra-args="--no-default-features" --cargo-extra-args="--features=abi3,extension-module" # disable mimallocator
' > build-wheel.sh
docker run --rm -v "$PWD":/io -w /io messense/manylinux2014-cross:${{ matrix.platform.arch }} bash build-wheel.sh
- uses: uraimo/[email protected]
name: Install built wheel
with:
Expand Down Expand Up @@ -220,9 +202,9 @@ jobs:
profile: minimal
default: true
- name: Build
run: cargo build
run: cargo build --release
- name: Tests
run: cargo test
run: cargo test --no-default-features
- uses: actions/setup-python@v2
with:
python-version: pypy-3.6
Expand Down
9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cramjam"
version = "2.0.2"
version = "2.1.0"
authors = ["Miles Granger <[email protected]>"]
edition = "2018"
license-file = "LICENSE"
Expand All @@ -12,20 +12,21 @@ readme = "README.md"
crate-type = ["cdylib"]

[features]
default = ["abi3", "mimallocator"]
default = ["abi3", "mimallocator", "extension-module"]
abi3 = ["pyo3/abi3-py36"]
mimallocator = ["mimalloc"]
extension-module = ["pyo3/extension-module"]

[profile.release]
lto = "fat"
codegen-units = 1
opt-level = 3

[dependencies]
pyo3 = { version = "0.13.2", features = ["extension-module"] }
pyo3 = { version = "0.13.2", default-features = false, features = ["macros"] }
snap = "^1"
brotli2 = "^0.3"
lz-fear = "0.1.1"
lz4 = "^1"
flate2 = "^1"
zstd = "0.6.0+zstd.1.4.8"
numpy = "0.13.0"
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ dev-install:
pip install cramjam --no-index --find-links dist/

pypy-build:
maturin build -i $(shell which pypy) --release --out dist --cargo-extra-args="--no-default-features" # disable abi3
maturin build -i $(shell which pypy) --release --out dist --cargo-extra-args="--no-default-features" --cargo-extra-args="--features=mimallocator,extension-module" # disable abi3
pypy ./pypy_patch.py
45 changes: 29 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,42 @@ All available for use as:
b"bytes here"
```

Where the API is `cramjam.<compression-variant>.compress/decompress` and accepts
both `bytes` and `bytearray` objects.
Where the API is `cramjam.<compression-variant>.compress/decompress` and accepts
`bytes`/`bytearray`/`numpy.array`/`cramjam.File`/`cramjam.Buffer` objects.

**de/compress_into**
Additionally, all variants except for lz4, support `decompress_into` and `compress_into`.
If you have a numpy array preallocated, that can be used as the output location for de/compression.
Additionally, all variants support `decompress_into` and `compress_into`.
Ex.
```python
>>> from cramjam import snappy
```python
>>> import numpy as np
>>> compressed_data # some data that we know the size of when decompressed
>>> output = np.zeros(<<output length>>, dtype=np.uint8)
>>> snappy.decompress_into(compressed_data, output) # returns number of bytes decoded
<<int: the number of bytes affected>>
>>> from cramjam import snappy, Buffer
>>>
>>> data = np.frombuffer(b'some bytes here', dtype=np.uint8)
>>> data
array([115, 111, 109, 101, 32, 98, 121, 116, 101, 115, 32, 104, 101,
114, 101], dtype=uint8)
>>>
>>> compressed = Buffer()
>>> snappy.compress_into(data, compressed)
33 # 33 bytes written to compressed buffer
>>>
>>> compressed.tell() # Where is the buffer position?
33 # goodie!
>>>
>>> compressed.seek(0) # Go back to the start of the buffer so we can prepare to decompress
>>> decompressed = b'0' * len(data) # let's write to `bytes` as output
>>> decompressed
b'000000000000000'
>>>
>>> snappy.decompress_into(compressed, decompressed)
15 # 15 bytes written to decompressed
>>> decompressed
b'some bytes here'
```
This is very fast, as it avoids any buffer allocations on the rust side.

**Special note!**
If you know the length of the de/compress output, you
can provide `output_len=<<some int>>` to any `de/compress`
to get ~1.5-3x performance increase as this allows single
buffer allocation.

For `snappy` with `bytearray`s, it's only a mild improvement
as we currently are able to estimate the buffer size and can
resize the resulting `bytearray` to the correct size.
buffer allocation; doesn't really apply if you're using `cramjam.Buffer`
or `cramjam.File` objects.
22 changes: 12 additions & 10 deletions src/brotli.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
//! brotli de/compression interface
use crate::exceptions::{CompressionError, DecompressionError};
use crate::{to_py_err, BytesType, WriteablePyByteArray};
use numpy::PyArray1;
use crate::{to_py_err, BytesType};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::wrap_pyfunction;
use pyo3::{PyResult, Python};
use std::io::Cursor;

pub fn init_py_module(m: &PyModule) -> PyResult<()> {
pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
m.add_function(wrap_pyfunction!(decompress, m)?)?;
m.add_function(wrap_pyfunction!(compress_into, m)?)?;
Expand Down Expand Up @@ -48,17 +48,19 @@ pub fn compress<'a>(
#[pyfunction]
pub fn compress_into<'a>(
_py: Python<'a>,
data: BytesType<'a>,
array: &PyArray1<u8>,
input: BytesType<'a>,
mut output: BytesType<'a>,
level: Option<u32>,
) -> PyResult<usize> {
crate::generic_into!(compress(data -> array), level)
let r = internal::compress(input, &mut output, level)?;
Ok(r)
}

/// Decompress directly into an output buffer
#[pyfunction]
pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1<u8>) -> PyResult<usize> {
crate::generic_into!(decompress(data -> array))
pub fn decompress_into<'a>(_py: Python<'a>, input: BytesType<'a>, mut output: BytesType<'a>) -> PyResult<usize> {
let r = internal::decompress(input, &mut output)?;
Ok(r)
}

pub(crate) mod internal {
Expand All @@ -68,14 +70,14 @@ pub(crate) mod internal {
use std::io::Error;

/// Decompress via Brotli
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = BrotliDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress via Brotli
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 11);
let mut encoder = BrotliEncoder::new(input, level);
let n_bytes = std::io::copy(&mut encoder, output)?;
Expand Down
22 changes: 12 additions & 10 deletions src/deflate.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
//! deflate de/compression interface
use crate::exceptions::{CompressionError, DecompressionError};
use crate::{to_py_err, BytesType, WriteablePyByteArray};
use numpy::PyArray1;
use crate::{to_py_err, BytesType};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::wrap_pyfunction;
use pyo3::{PyResult, Python};
use std::io::Cursor;

pub fn init_py_module(m: &PyModule) -> PyResult<()> {
pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
m.add_function(wrap_pyfunction!(decompress, m)?)?;
m.add_function(wrap_pyfunction!(compress_into, m)?)?;
Expand Down Expand Up @@ -48,17 +48,19 @@ pub fn compress<'a>(
#[pyfunction]
pub fn compress_into<'a>(
_py: Python<'a>,
data: BytesType<'a>,
array: &PyArray1<u8>,
input: BytesType<'a>,
mut output: BytesType<'a>,
level: Option<u32>,
) -> PyResult<usize> {
crate::generic_into!(compress(data -> array), level)
let r = internal::compress(input, &mut output, level)?;
Ok(r)
}

/// Decompress directly into an output buffer
#[pyfunction]
pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1<u8>) -> PyResult<usize> {
crate::generic_into!(decompress(data -> array))
pub fn decompress_into<'a>(_py: Python<'a>, input: BytesType<'a>, mut output: BytesType<'a>) -> PyResult<usize> {
let r = internal::decompress(input, &mut output)?;
Ok(r)
}

pub(crate) mod internal {
Expand All @@ -69,14 +71,14 @@ pub(crate) mod internal {
use std::io::Error;

/// Decompress gzip data
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = DeflateDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress gzip data
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 6);

let mut encoder = DeflateEncoder::new(input, Compression::new(level));
Expand Down
2 changes: 2 additions & 0 deletions src/exceptions.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(missing_docs)]
//! cramjam specific Python exceptions
use pyo3::create_exception;
use pyo3::exceptions::PyException;

Expand Down
22 changes: 12 additions & 10 deletions src/gzip.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
//! gzip de/compression interface
use crate::exceptions::{CompressionError, DecompressionError};
use crate::{to_py_err, BytesType, WriteablePyByteArray};
use numpy::PyArray1;
use crate::{to_py_err, BytesType};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::wrap_pyfunction;
use pyo3::{PyResult, Python};
use std::io::Cursor;

pub fn init_py_module(m: &PyModule) -> PyResult<()> {
pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
m.add_function(wrap_pyfunction!(decompress, m)?)?;
m.add_function(wrap_pyfunction!(compress_into, m)?)?;
Expand Down Expand Up @@ -48,17 +48,19 @@ pub fn compress<'a>(
#[pyfunction]
pub fn compress_into<'a>(
_py: Python<'a>,
data: BytesType<'a>,
array: &PyArray1<u8>,
input: BytesType<'a>,
mut output: BytesType<'a>,
level: Option<u32>,
) -> PyResult<usize> {
crate::generic_into!(compress(data -> array), level)
let r = internal::compress(input, &mut output, level)?;
Ok(r)
}

/// Decompress directly into an output buffer
#[pyfunction]
pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1<u8>) -> PyResult<usize> {
crate::generic_into!(decompress(data -> array))
pub fn decompress_into<'a>(_py: Python<'a>, input: BytesType<'a>, mut output: BytesType<'a>) -> PyResult<usize> {
let r = internal::decompress(input, &mut output)?;
Ok(r)
}

pub(crate) mod internal {
Expand All @@ -68,14 +70,14 @@ pub(crate) mod internal {
use std::io::Error;

/// Decompress gzip data
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = GzDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress gzip data
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 6);
let mut encoder = GzEncoder::new(input, Compression::new(level));
let n_bytes = std::io::copy(&mut encoder, output)?;
Expand Down
Loading

0 comments on commit 16b78f5

Please sign in to comment.