Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rust file-like objects and accept bytes/bytearray/numpy #45

Merged
merged 30 commits into from
Mar 18, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
748dc50
Initial File object
milesgranger Mar 10, 2021
dfe4ac0
impl Read/Write for RustyBuffer/File
milesgranger Mar 10, 2021
a0c4980
Successfull conversion to accepting Read/Write; missing lz4 and snapp…
milesgranger Mar 10, 2021
dd6915e
Add example python test, rust tests are fubar for the moment
milesgranger Mar 10, 2021
1384f84
Swap lz4, minor updates to generic macro
milesgranger Mar 11, 2021
007f07f
Update README, lz4 supports de/compress_into now
milesgranger Mar 11, 2021
ab63626
chkpt: fix tests, need to implement Seek for WritablePyByteArray
milesgranger Mar 11, 2021
587c978
full lz4 support
milesgranger Mar 12, 2021
e581346
Update Cargo.toml pyo3 features
milesgranger Mar 12, 2021
c1ffcc8
chkpt: start on a mostly IOBase interface
milesgranger Mar 12, 2021
5fa1fea
Support .tell()
milesgranger Mar 12, 2021
bc8d02a
Initial impl of readinto
milesgranger Mar 12, 2021
24292e0
rust file-like obj api test, other cleanup
milesgranger Mar 12, 2021
753db39
.write also take BytesType enum
milesgranger Mar 13, 2021
e1f69a2
get ci to work, remove rlib?
milesgranger Mar 13, 2021
024922b
Use PyBytes::new_with in read with known n_bytes
milesgranger Mar 13, 2021
3201a5a
Fix PyPy and linux cross compilation (#46)
messense Mar 13, 2021
a28dc56
Support Write for BytesType
milesgranger Mar 13, 2021
8a497dc
Initial impl support seek from positions .seek(.., whence=..)
milesgranger Mar 13, 2021
b6ba051
Convert all variants to some Rust wrapper
milesgranger Mar 13, 2021
1a5b811
impl Seek for BytesType and update lz4 n compressed bytes
milesgranger Mar 14, 2021
b36947e
chkpt: RawEncoder/RawDecoder impl
milesgranger Mar 14, 2021
7b9aad0
RawEncoder/Decoder use entire bytes, cannot stream
milesgranger Mar 16, 2021
26024a9
Support Buffer taking BytesType directly, like BytesIO does
milesgranger Mar 17, 2021
6b2557b
chkpt: working on docs
milesgranger Mar 17, 2021
5ce7bbd
chkpt: working on docs
milesgranger Mar 17, 2021
6e33f60
chkpt: more docs
milesgranger Mar 17, 2021
57ba3b9
more docs
milesgranger Mar 18, 2021
66150e1
even more..
milesgranger Mar 18, 2021
d3c1328
Remove dead code
milesgranger Mar 18, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/brotli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ pub(crate) mod internal {
use std::io::Error;

/// Decompress via Brotli
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = BrotliDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress via Brotli
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 11);
let mut encoder = BrotliEncoder::new(input, level);
let n_bytes = std::io::copy(&mut encoder, output)?;
Expand Down
4 changes: 2 additions & 2 deletions src/deflate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,14 @@ pub(crate) mod internal {
use std::io::Error;

/// Decompress gzip data
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = DeflateDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress gzip data
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 6);

let mut encoder = DeflateEncoder::new(input, Compression::new(level));
Expand Down
4 changes: 2 additions & 2 deletions src/gzip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,14 +68,14 @@ pub(crate) mod internal {
use std::io::Error;

/// Decompress gzip data
pub fn decompress<W: Write + ?Sized>(input: &[u8], output: &mut W) -> Result<usize, Error> {
pub fn decompress<W: Write + ?Sized, R: Read>(input: R, output: &mut W) -> Result<usize, Error> {
let mut decoder = GzDecoder::new(input);
let n_bytes = std::io::copy(&mut decoder, output)?;
Ok(n_bytes as usize)
}

/// Compress gzip data
pub fn compress<W: Write + ?Sized>(input: &[u8], output: &mut W, level: Option<u32>) -> Result<usize, Error> {
pub fn compress<W: Write + ?Sized, R: Read>(input: R, output: &mut W, level: Option<u32>) -> Result<usize, Error> {
let level = level.unwrap_or_else(|| 6);
let mut encoder = GzEncoder::new(input, Compression::new(level));
let n_bytes = std::io::copy(&mut encoder, output)?;
Expand Down
138 changes: 138 additions & 0 deletions src/io.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
use std::fs::{File, OpenOptions};
use std::io::{Read, Seek, SeekFrom, Write, Cursor};

use pyo3::prelude::*;
use pyo3::types::{PyBytes};


#[pyclass(name="File")]
pub struct RustyFile {
inner: File
}

#[pymethods]
impl RustyFile {
#[new]
pub fn new(
path: &str,
read: Option<bool>,
write: Option<bool>,
truncate: Option<bool>,
append: Option<bool>,
) -> PyResult<Self> {
Ok(Self {
inner: OpenOptions::new()
.read(read.unwrap_or_else(|| true))
.write(write.unwrap_or_else(|| true))
.truncate(truncate.unwrap_or_else(|| false))
.create(true) // create if doesn't exist, but open if it does.
.append(append.unwrap_or_else(|| false))
.open(path)?,
})
}

pub fn write(&mut self, buf: &[u8]) -> PyResult<usize> {
let r = Write::write(self, buf)?;
Ok(r)
}
pub fn read<'a>(&mut self, py: Python<'a>, n_bytes: Option<usize>) -> PyResult<&'a PyBytes> {
match n_bytes {
Some(n) => {
let mut buf = vec![0; n];
self.inner.read(buf.as_mut_slice())?;
Ok(PyBytes::new(py, buf.as_slice()))
milesgranger marked this conversation as resolved.
Show resolved Hide resolved
}
None => {
let mut buf = vec![];
self.inner.read_to_end(&mut buf)?;
Ok(PyBytes::new(py, buf.as_slice()))
}
}
}
pub fn seek(&mut self, position: usize) -> PyResult<usize> {
let r = self.inner.seek(SeekFrom::Start(position as u64)).map(|r| r as usize)?;
Ok(r)
}
pub fn set_len(&mut self, size: usize) -> PyResult<()> {
self.inner.set_len(size as u64)?;
Ok(())
}
pub fn truncate(&mut self) -> PyResult<()> {
self.set_len(0)
}
}

#[pyclass(name="Buffer")]
#[derive(Default)]
pub struct RustyBuffer {
inner: Cursor<Vec<u8>>,
}

#[pymethods]
impl RustyBuffer {
#[new]
pub fn new(len: Option<usize>) -> Self {
Self { inner: Cursor::new(vec![0; len.unwrap_or_else(|| 0)]) }
}

pub fn write(&mut self, buf: &[u8]) -> PyResult<usize> {
let r = Write::write(self, buf)?;
Ok(r)
}
pub fn read<'a>(&mut self, py: Python<'a>, n_bytes: Option<usize>) -> PyResult<&'a PyBytes> {
match n_bytes {
Some(n) => {
let mut buf = vec![0; n];
self.inner.read(buf.as_mut_slice())?;
Ok(PyBytes::new(py, buf.as_slice()))
}
None => {
let mut buf = vec![];
self.inner.read_to_end(&mut buf)?;
Ok(PyBytes::new(py, buf.as_slice()))
}
}
}
pub fn seek(&mut self, position: usize) -> PyResult<usize> {
let r = self.inner.seek(SeekFrom::Start(position as u64)).map(|r| r as usize)?;
Ok(r)
}
pub fn set_len(&mut self, size: usize) -> PyResult<()> {
self.inner.get_mut().resize(size, 0);
Ok(())
}
pub fn truncate(&mut self) -> PyResult<()> {
self.inner.get_mut().truncate(0);
Ok(())
}
}

impl Write for RustyBuffer {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.inner.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
}
}

impl Write for RustyFile {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.inner.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.inner.flush()
}
}

impl Read for &mut RustyBuffer {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.inner.read(buf)
}
}

impl Read for &mut RustyFile {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.inner.read(buf)
}
}
73 changes: 52 additions & 21 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ pub mod gzip;
pub mod lz4;
pub mod snappy;
pub mod zstd;
pub mod io;

use pyo3::prelude::*;
use pyo3::types::{PyByteArray, PyBytes};

use exceptions::{CompressionError, DecompressionError};
use std::io::Write;
use crate::io::{RustyFile, RustyBuffer};

#[cfg(feature = "mimallocator")]
#[global_allocator]
Expand All @@ -42,6 +44,10 @@ pub enum BytesType<'a> {
Bytes(&'a PyBytes),
#[pyo3(transparent, annotation = "bytearray")]
ByteArray(&'a PyByteArray),
#[pyo3(transparent, annotation = "File")]
RustyFile(&'a PyCell<RustyFile>),
#[pyo3(transparent, annotation = "Buffer")]
RustyBuffer(&'a PyCell<RustyBuffer>),
}

impl<'a> BytesType<'a> {
Expand All @@ -53,6 +59,7 @@ impl<'a> BytesType<'a> {
match self {
Self::Bytes(b) => b.as_bytes(),
Self::ByteArray(b) => unsafe { b.as_bytes() },
_ => unimplemented!("Converting Rust native types to bytes is not supported")
}
}
}
Expand All @@ -62,6 +69,8 @@ impl<'a> IntoPy<PyObject> for BytesType<'a> {
match self {
Self::Bytes(bytes) => bytes.to_object(py),
Self::ByteArray(byte_array) => byte_array.to_object(py),
Self::RustyFile(file) => file.to_object(py),
Self::RustyBuffer(buffer) => buffer.to_object(py)
}
}
}
Expand Down Expand Up @@ -134,40 +143,61 @@ macro_rules! generic_into {
macro_rules! generic {
($op:ident($input:ident), py=$py:ident, output_len=$output_len:ident $(, level=$level:ident)?) => {
{
let bytes = $input.as_bytes();
match $input {
BytesType::Bytes(_) => match $output_len {
Some(len) => {
let pybytes = PyBytes::new_with($py, len, |buffer| {
let mut cursor = Cursor::new(buffer);
BytesType::Bytes(b) => {
let bytes = b.as_bytes();
match $output_len {
Some(len) => {
let pybytes = PyBytes::new_with($py, len, |buffer| {
let mut cursor = Cursor::new(buffer);
if stringify!($op) == "compress" {
to_py_err!(CompressionError -> self::internal::$op(bytes, &mut cursor $(, $level)? ))?;
} else {
to_py_err!(DecompressionError -> self::internal::$op(bytes, &mut cursor $(, $level)? ))?;
}
Ok(())
})?;
Ok(BytesType::Bytes(pybytes))
}
None => {
let mut buffer = Vec::new();
if stringify!($op) == "compress" {
to_py_err!(CompressionError -> self::internal::$op(bytes, &mut cursor $(, $level)? ))?;
to_py_err!(CompressionError -> self::internal::$op(bytes, &mut buffer $(, $level)? ))?;
} else {
to_py_err!(DecompressionError -> self::internal::$op(bytes, &mut cursor $(, $level)? ))?;
to_py_err!(DecompressionError -> self::internal::$op(bytes, &mut buffer $(, $level)? ))?;
}
Ok(())
})?;
Ok(BytesType::Bytes(pybytes))
}
None => {
let mut buffer = Vec::new();
if stringify!($op) == "compress" {
to_py_err!(CompressionError -> self::internal::$op(bytes, &mut buffer $(, $level)? ))?;
} else {
to_py_err!(DecompressionError -> self::internal::$op(bytes, &mut buffer $(, $level)? ))?;
}

Ok(BytesType::Bytes(PyBytes::new($py, &buffer)))
Ok(BytesType::Bytes(PyBytes::new($py, &buffer)))
}
}
},
BytesType::ByteArray(_) => {
BytesType::ByteArray(b) => {
let bytes = unsafe { b.as_bytes() };
let mut pybytes = WriteablePyByteArray::new($py, $output_len.unwrap_or_else(|| 0));
if stringify!($op) == "compress" {
to_py_err!(CompressionError -> self::internal::$op(bytes, &mut pybytes $(, $level)? ))?;
} else {
to_py_err!(DecompressionError -> self::internal::$op(bytes, &mut pybytes $(, $level)? ))?;
}
Ok(BytesType::ByteArray(pybytes.into_inner()?))
},
BytesType::RustyFile(file) => {
let mut output = crate::io::RustyBuffer::default();
if stringify!($op) == "compress" {
to_py_err!(CompressionError -> self::internal::$op(&mut *file.borrow_mut(), &mut output $(, $level)? ))?;
} else {
to_py_err!(DecompressionError -> self::internal::$op(&mut *file.borrow_mut(), &mut output $(, $level)? ))?;
}
Ok(BytesType::RustyBuffer(PyCell::new($py, output).unwrap()))
},
BytesType::RustyBuffer(buffer) => {
let mut output = crate::io::RustyBuffer::default();
if stringify!($op) == "compress" {
to_py_err!(CompressionError -> self::internal::$op(&mut *buffer.borrow_mut(), &mut output $(, $level)? ))?;
} else {
to_py_err!(DecompressionError -> self::internal::$op(&mut *buffer.borrow_mut(), &mut output $(, $level)? ))?;
}
Ok(BytesType::RustyBuffer(PyCell::new($py, output).unwrap()))
}
}
}
Expand All @@ -194,7 +224,8 @@ fn cramjam(py: Python, m: &PyModule) -> PyResult<()> {
m.add("__version__", env!("CARGO_PKG_VERSION"))?;
m.add("CompressionError", py.get_type::<CompressionError>())?;
m.add("DecompressionError", py.get_type::<DecompressionError>())?;

m.add_class::<crate::io::RustyFile>()?;
m.add_class::<crate::io::RustyBuffer>()?;
make_submodule!(py -> m -> snappy);
make_submodule!(py -> m -> brotli);
make_submodule!(py -> m -> lz4);
Expand Down
11 changes: 7 additions & 4 deletions src/lz4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ pub fn decompress<'a>(py: Python<'a>, data: BytesType<'a>, output_len: Option<us
BytesType::ByteArray(input) => {
let out = to_py_err!(DecompressionError -> self::internal::decompress(unsafe { input.as_bytes() }))?;
Ok(BytesType::ByteArray(PyByteArray::new(py, &out)))
}
},
_ => Err(DecompressionError::new_err("decompress not supported for native Rust types in lz4."))
}
}

Expand Down Expand Up @@ -58,21 +59,23 @@ pub fn compress<'a>(
BytesType::ByteArray(input) => {
let out = to_py_err!(CompressionError -> self::internal::compress(unsafe { input.as_bytes() }, level))?;
Ok(BytesType::ByteArray(PyByteArray::new(py, &out)))
}
},
_ => Err(CompressionError::new_err("compress not supported for native Rust types for lz4."))
}
}

pub(crate) mod internal {
use std::error::Error;
use std::io::Read;

/// Decompress lz4 data
pub fn decompress(data: &[u8]) -> Result<Vec<u8>, Box<dyn Error>> {
pub fn decompress<R: Read>(data: R) -> Result<Vec<u8>, Box<dyn Error>> {
lz_fear::framed::decompress_frame(data).map_err(|err| err.into())
}

/// Compress lz4 data
// TODO: lz-fear does not yet support level
pub fn compress(data: &[u8], level: Option<u32>) -> Result<Vec<u8>, Box<dyn Error>> {
pub fn compress<R: Read>(data: R, level: Option<u32>) -> Result<Vec<u8>, Box<dyn Error>> {
let _ = level.unwrap_or_else(|| 4);
let mut buf = vec![];
lz_fear::framed::CompressionSettings::default().compress(data, &mut buf)?;
Expand Down
Loading