Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rust file-like objects and accept bytes/bytearray/numpy #45

Merged
merged 30 commits into from
Mar 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
748dc50
Initial File object
milesgranger Mar 10, 2021
dfe4ac0
impl Read/Write for RustyBuffer/File
milesgranger Mar 10, 2021
a0c4980
Successfull conversion to accepting Read/Write; missing lz4 and snapp…
milesgranger Mar 10, 2021
dd6915e
Add example python test, rust tests are fubar for the moment
milesgranger Mar 10, 2021
1384f84
Swap lz4, minor updates to generic macro
milesgranger Mar 11, 2021
007f07f
Update README, lz4 supports de/compress_into now
milesgranger Mar 11, 2021
ab63626
chkpt: fix tests, need to implement Seek for WritablePyByteArray
milesgranger Mar 11, 2021
587c978
full lz4 support
milesgranger Mar 12, 2021
e581346
Update Cargo.toml pyo3 features
milesgranger Mar 12, 2021
c1ffcc8
chkpt: start on a mostly IOBase interface
milesgranger Mar 12, 2021
5fa1fea
Support .tell()
milesgranger Mar 12, 2021
bc8d02a
Initial impl of readinto
milesgranger Mar 12, 2021
24292e0
rust file-like obj api test, other cleanup
milesgranger Mar 12, 2021
753db39
.write also take BytesType enum
milesgranger Mar 13, 2021
e1f69a2
get ci to work, remove rlib?
milesgranger Mar 13, 2021
024922b
Use PyBytes::new_with in read with known n_bytes
milesgranger Mar 13, 2021
3201a5a
Fix PyPy and linux cross compilation (#46)
messense Mar 13, 2021
a28dc56
Support Write for BytesType
milesgranger Mar 13, 2021
8a497dc
Initial impl support seek from positions .seek(.., whence=..)
milesgranger Mar 13, 2021
b6ba051
Convert all variants to some Rust wrapper
milesgranger Mar 13, 2021
1a5b811
impl Seek for BytesType and update lz4 n compressed bytes
milesgranger Mar 14, 2021
b36947e
chkpt: RawEncoder/RawDecoder impl
milesgranger Mar 14, 2021
7b9aad0
RawEncoder/Decoder use entire bytes, cannot stream
milesgranger Mar 16, 2021
26024a9
Support Buffer taking BytesType directly, like BytesIO does
milesgranger Mar 17, 2021
6b2557b
chkpt: working on docs
milesgranger Mar 17, 2021
5ce7bbd
chkpt: working on docs
milesgranger Mar 17, 2021
6e33f60
chkpt: more docs
milesgranger Mar 17, 2021
57ba3b9
more docs
milesgranger Mar 18, 2021
66150e1
even more..
milesgranger Mar 18, 2021
d3c1328
Remove dead code
milesgranger Mar 18, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 16 additions & 34 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ jobs:
profile: minimal
default: true
- name: Build
run: cargo build
run: cargo build --release
- name: Tests
run: cargo test
run: cargo test --no-default-features --release
- name: Install maturin
run: pip install maturin
- name: Build wheels - x86_64
Expand Down Expand Up @@ -78,10 +78,10 @@ jobs:
default: true
- name: Build
if: matrix.platform.python-architecture == 'x64'
run: cargo build
run: cargo build --release
- name: Tests
if: matrix.platform.python-architecture == 'x64'
run: cargo test
run: cargo test --no-default-features --release
- name: Install maturin
run: pip install maturin
- name: Build wheels
Expand Down Expand Up @@ -115,9 +115,9 @@ jobs:
profile: minimal
default: true
- name: Build
run: cargo build
run: cargo build --release
- name: Tests
run: cargo test
run: cargo test --no-default-features
- uses: actions/setup-python@v2
with:
python-version: 3.6
Expand All @@ -129,7 +129,6 @@ jobs:
pip install maturin
maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }}
' > build-wheel.sh
chmod +x build-wheel.sh

docker run --rm -v "$PWD":/io -w /io quay.io/pypa/manylinux${{ matrix.platform.manylinux }}_${{ matrix.platform.arch }} bash build-wheel.sh
- name: Python UnitTest
Expand All @@ -145,7 +144,7 @@ jobs:
path: dist

linux-cross:
runs-on: ubuntu-16.04
runs-on: ubuntu-latest
strategy:
matrix:
platform: [
Expand All @@ -157,32 +156,15 @@ jobs:
- uses: actions/setup-python@v2
with:
python-version: 3.6
- name: Install Rust toolchain
uses: actions-rs/toolchain@v1
with:
toolchain: stable
target: ${{ matrix.platform.target }}
profile: minimal
default: true
- name: Install aarch64 cross compiler
if: matrix.platform.target == 'aarch64-unknown-linux-gnu'
run: |
sudo apt-get install -y gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross
echo "TARGET_CC=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV"
echo "TARGET_CXX=aarch64-linux-gnu-cpp" >> "$GITHUB_ENV"
echo "CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc" >> "$GITHUB_ENV"
- name: Install armv7 cross compiler
if: matrix.platform.target == 'armv7-unknown-linux-gnueabihf'
run: |
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
echo "TARGET_CC=arm-linux-gnueabihf-gcc" >> "$GITHUB_ENV"
echo "TARGET_CXX=arm-linux-gnueabihf-cpp" >> "$GITHUB_ENV"
echo "CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc" >> "$GITHUB_ENV"
- name: Install maturin
run: pip install maturin
- name: Build Wheels
run: |
maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }} --cargo-extra-args="--no-default-features" --cargo-extra-args="--features=abi3"
echo 'curl -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
source ~/.cargo/env
rustup target add ${{ matrix.platform.target }}
maturin build -i python --release --out dist --no-sdist --target ${{ matrix.platform.target }} --manylinux ${{ matrix.platform.manylinux }} --cargo-extra-args="--no-default-features" --cargo-extra-args="--features=abi3,extension-module" # disable mimallocator
' > build-wheel.sh

docker run --rm -v "$PWD":/io -w /io messense/manylinux2014-cross:${{ matrix.platform.arch }} bash build-wheel.sh
- uses: uraimo/[email protected]
name: Install built wheel
with:
Expand Down Expand Up @@ -220,9 +202,9 @@ jobs:
profile: minimal
default: true
- name: Build
run: cargo build
run: cargo build --release
- name: Tests
run: cargo test
run: cargo test --no-default-features
- uses: actions/setup-python@v2
with:
python-version: pypy-3.6
Expand Down
9 changes: 5 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "cramjam"
version = "2.0.2"
version = "2.1.0"
authors = ["Miles Granger <[email protected]>"]
edition = "2018"
license-file = "LICENSE"
Expand All @@ -12,20 +12,21 @@ readme = "README.md"
crate-type = ["cdylib"]

[features]
default = ["abi3", "mimallocator"]
default = ["abi3", "mimallocator", "extension-module"]
abi3 = ["pyo3/abi3-py36"]
mimallocator = ["mimalloc"]
extension-module = ["pyo3/extension-module"]

[profile.release]
lto = "fat"
codegen-units = 1
opt-level = 3

[dependencies]
pyo3 = { version = "0.13.2", features = ["extension-module"] }
pyo3 = { version = "0.13.2", default-features = false, features = ["macros"] }
snap = "^1"
brotli2 = "^0.3"
lz-fear = "0.1.1"
lz4 = "^1"
flate2 = "^1"
zstd = "0.6.0+zstd.1.4.8"
numpy = "0.13.0"
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ dev-install:
pip install cramjam --no-index --find-links dist/

pypy-build:
maturin build -i $(shell which pypy) --release --out dist --cargo-extra-args="--no-default-features" # disable abi3
maturin build -i $(shell which pypy) --release --out dist --cargo-extra-args="--no-default-features" --cargo-extra-args="--features=mimallocator,extension-module" # disable abi3
pypy ./pypy_patch.py
45 changes: 29 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,29 +45,42 @@ All available for use as:
b"bytes here"
```

Where the API is `cramjam.<compression-variant>.compress/decompress` and accepts
both `bytes` and `bytearray` objects.
Where the API is `cramjam.<compression-variant>.compress/decompress` and accepts
`bytes`/`bytearray`/`numpy.array`/`cramjam.File`/`cramjam.Buffer` objects.

**de/compress_into**
Additionally, all variants except for lz4, support `decompress_into` and `compress_into`.
If you have a numpy array preallocated, that can be used as the output location for de/compression.
Additionally, all variants support `decompress_into` and `compress_into`.
Ex.
```python
>>> from cramjam import snappy
```python
>>> import numpy as np
>>> compressed_data # some data that we know the size of when decompressed
>>> output = np.zeros(<<output length>>, dtype=np.uint8)
>>> snappy.decompress_into(compressed_data, output) # returns number of bytes decoded
<<int: the number of bytes affected>>
>>> from cramjam import snappy, Buffer
>>>
>>> data = np.frombuffer(b'some bytes here', dtype=np.uint8)
>>> data
array([115, 111, 109, 101, 32, 98, 121, 116, 101, 115, 32, 104, 101,
114, 101], dtype=uint8)
>>>
>>> compressed = Buffer()
>>> snappy.compress_into(data, compressed)
33 # 33 bytes written to compressed buffer
>>>
>>> compressed.tell() # Where is the buffer position?
33 # goodie!
>>>
>>> compressed.seek(0) # Go back to the start of the buffer so we can prepare to decompress
>>> decompressed = b'0' * len(data) # let's write to `bytes` as output
>>> decompressed
b'000000000000000'
>>>
>>> snappy.decompress_into(compressed, decompressed)
15 # 15 bytes written to decompressed
>>> decompressed
b'some bytes here'
```
This is very fast, as it avoids any buffer allocations on the rust side.

**Special note!**
If you know the length of the de/compress output, you
can provide `output_len=<<some int>>` to any `de/compress`
to get ~1.5-3x performance increase as this allows single
buffer allocation.

For `snappy` with `bytearray`s, it's only a mild improvement
as we currently are able to estimate the buffer size and can
resize the resulting `bytearray` to the correct size.
buffer allocation; doesn't really apply if you're using `cramjam.Buffer`
or `cramjam.File` objects.
22 changes: 12 additions & 10 deletions src/brotli.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
//! brotli de/compression interface
use crate::exceptions::{CompressionError, DecompressionError};
use crate::{to_py_err, BytesType, WriteablePyByteArray};
use numpy::PyArray1;
use crate::{to_py_err, BytesType};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::wrap_pyfunction;
use pyo3::{PyResult, Python};
use std::io::Cursor;

pub fn init_py_module(m: &PyModule) -> PyResult<()> {
pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
m.add_function(wrap_pyfunction!(decompress, m)?)?;
m.add_function(wrap_pyfunction!(compress_into, m)?)?;
Expand Down Expand Up @@ -48,17 +48,19 @@ pub fn compress<'a>(
#[pyfunction]
pub fn compress_into<'a>(
_py: Python<'a>,
data: BytesType<'a>,
array: &PyArray1<u8>,
input: BytesType<'a>,
mut output: BytesType<'a>,
level: Option<u32>,
) -> PyResult<usize> {
crate::generic_into!(compress(data -> array), level)
let r = internal::compress(input, &mut output, level)?;
Ok(r)
}

/// Decompress directly into an output buffer
#[pyfunction]
pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1<u8>) -> PyResult<usize> {
crate::generic_into!(decompress(data -> array))
pub fn decompress_into<'a>(_py: Python<'a>, input: BytesType<'a>, mut output: BytesType<'a>) -> PyResult<usize> {
let r = internal::decompress(input, &mut output)?;
Ok(r)
}

pub(crate) mod internal {
Expand All @@ -68,14 +70,14 @@ pub(crate) mod internal {
use std::io::Error;

/// Decompress via Brotli
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = BrotliDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress via Brotli
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 11);
let mut encoder = BrotliEncoder::new(input, level);
let n_bytes = std::io::copy(&mut encoder, output)?;
Expand Down
22 changes: 12 additions & 10 deletions src/deflate.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
//! deflate de/compression interface
use crate::exceptions::{CompressionError, DecompressionError};
use crate::{to_py_err, BytesType, WriteablePyByteArray};
use numpy::PyArray1;
use crate::{to_py_err, BytesType};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::wrap_pyfunction;
use pyo3::{PyResult, Python};
use std::io::Cursor;

pub fn init_py_module(m: &PyModule) -> PyResult<()> {
pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
m.add_function(wrap_pyfunction!(decompress, m)?)?;
m.add_function(wrap_pyfunction!(compress_into, m)?)?;
Expand Down Expand Up @@ -48,17 +48,19 @@ pub fn compress<'a>(
#[pyfunction]
pub fn compress_into<'a>(
_py: Python<'a>,
data: BytesType<'a>,
array: &PyArray1<u8>,
input: BytesType<'a>,
mut output: BytesType<'a>,
level: Option<u32>,
) -> PyResult<usize> {
crate::generic_into!(compress(data -> array), level)
let r = internal::compress(input, &mut output, level)?;
Ok(r)
}

/// Decompress directly into an output buffer
#[pyfunction]
pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1<u8>) -> PyResult<usize> {
crate::generic_into!(decompress(data -> array))
pub fn decompress_into<'a>(_py: Python<'a>, input: BytesType<'a>, mut output: BytesType<'a>) -> PyResult<usize> {
let r = internal::decompress(input, &mut output)?;
Ok(r)
}

pub(crate) mod internal {
Expand All @@ -69,14 +71,14 @@ pub(crate) mod internal {
use std::io::Error;

/// Decompress gzip data
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = DeflateDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress gzip data
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 6);

let mut encoder = DeflateEncoder::new(input, Compression::new(level));
Expand Down
2 changes: 2 additions & 0 deletions src/exceptions.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(missing_docs)]
//! cramjam specific Python exceptions
use pyo3::create_exception;
use pyo3::exceptions::PyException;

Expand Down
22 changes: 12 additions & 10 deletions src/gzip.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
//! gzip de/compression interface
use crate::exceptions::{CompressionError, DecompressionError};
use crate::{to_py_err, BytesType, WriteablePyByteArray};
use numpy::PyArray1;
use crate::{to_py_err, BytesType};
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use pyo3::wrap_pyfunction;
use pyo3::{PyResult, Python};
use std::io::Cursor;

pub fn init_py_module(m: &PyModule) -> PyResult<()> {
pub(crate) fn init_py_module(m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(compress, m)?)?;
m.add_function(wrap_pyfunction!(decompress, m)?)?;
m.add_function(wrap_pyfunction!(compress_into, m)?)?;
Expand Down Expand Up @@ -48,17 +48,19 @@ pub fn compress<'a>(
#[pyfunction]
pub fn compress_into<'a>(
_py: Python<'a>,
data: BytesType<'a>,
array: &PyArray1<u8>,
input: BytesType<'a>,
mut output: BytesType<'a>,
level: Option<u32>,
) -> PyResult<usize> {
crate::generic_into!(compress(data -> array), level)
let r = internal::compress(input, &mut output, level)?;
Ok(r)
}

/// Decompress directly into an output buffer
#[pyfunction]
pub fn decompress_into<'a>(_py: Python<'a>, data: BytesType<'a>, array: &'a PyArray1<u8>) -> PyResult<usize> {
crate::generic_into!(decompress(data -> array))
pub fn decompress_into<'a>(_py: Python<'a>, input: BytesType<'a>, mut output: BytesType<'a>) -> PyResult<usize> {
let r = internal::decompress(input, &mut output)?;
Ok(r)
}

pub(crate) mod internal {
Expand All @@ -68,14 +70,14 @@ pub(crate) mod internal {
use std::io::Error;

/// Decompress gzip data
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = GzDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress gzip data
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 6);
let mut encoder = GzEncoder::new(input, Compression::new(level));
let n_bytes = std::io::copy(&mut encoder, output)?;
Expand Down
Loading