-
Notifications
You must be signed in to change notification settings - Fork 795
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
OsStr and Path conversions #1379
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
//! This module contains conversions between non-String Rust object and their string representation | ||
//! in Python | ||
|
||
mod osstr; | ||
mod path; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,202 @@ | ||
use crate::types::PyString; | ||
#[cfg(windows)] | ||
use crate::PyErr; | ||
use crate::PyNativeType; | ||
use crate::{ | ||
ffi, AsPyPointer, FromPyObject, IntoPy, PyAny, PyObject, PyResult, PyTryFrom, Python, | ||
ToPyObject, | ||
}; | ||
use std::borrow::Cow; | ||
use std::ffi::{OsStr, OsString}; | ||
#[cfg(not(windows))] | ||
use std::os::raw::c_char; | ||
|
||
impl ToPyObject for OsStr { | ||
fn to_object(&self, py: Python) -> PyObject { | ||
// If the string is UTF-8, take the quick and easy shortcut | ||
if let Some(valid_utf8_path) = self.to_str() { | ||
return valid_utf8_path.to_object(py); | ||
} | ||
|
||
// All targets besides windows support the std::os::unix::ffi::OsStrExt API: | ||
// https://doc.rust-lang.org/src/std/sys_common/mod.rs.html#59 | ||
#[cfg(not(windows))] | ||
{ | ||
let bytes = std::os::unix::ffi::OsStrExt::as_bytes(self); | ||
|
||
let ptr = bytes.as_ptr() as *const c_char; | ||
let len = bytes.len() as ffi::Py_ssize_t; | ||
unsafe { | ||
// DecodeFSDefault automatically chooses an appropriate decoding mechanism to | ||
// parse os strings losslessly (i.e. surrogateescape most of the time) | ||
let pystring = ffi::PyUnicode_DecodeFSDefaultAndSize(ptr, len); | ||
PyObject::from_owned_ptr(py, pystring) | ||
} | ||
} | ||
|
||
#[cfg(windows)] | ||
{ | ||
let wstr: Vec<u16> = std::os::windows::ffi::OsStrExt::encode_wide(self).collect(); | ||
|
||
unsafe { | ||
// This will not panic because the data from encode_wide is well-formed Windows | ||
// string data | ||
PyObject::from_owned_ptr( | ||
py, | ||
ffi::PyUnicode_FromWideChar(wstr.as_ptr(), wstr.len() as ffi::Py_ssize_t), | ||
) | ||
} | ||
} | ||
} | ||
} | ||
|
||
// There's no FromPyObject implementation for &OsStr because albeit possible on Unix, this would | ||
// be impossible to implement on Windows. Hence it's omitted entirely | ||
|
||
impl FromPyObject<'_> for OsString { | ||
fn extract(ob: &PyAny) -> PyResult<Self> { | ||
let pystring = <PyString as PyTryFrom>::try_from(ob)?; // Cast PyAny to PyString | ||
|
||
#[cfg(not(windows))] | ||
{ | ||
// Decode from Python's lossless bytes string representation back into raw bytes | ||
let fs_encoded_bytes = unsafe { | ||
crate::Py::<crate::types::PyBytes>::from_owned_ptr( | ||
ob.py(), | ||
ffi::PyUnicode_EncodeFSDefault(pystring.as_ptr()), | ||
) | ||
}; | ||
|
||
// Create an OsStr view into the raw bytes from Python | ||
let os_str: &OsStr = std::os::unix::ffi::OsStrExt::from_bytes( | ||
fs_encoded_bytes.as_ref(ob.py()).as_bytes(), | ||
); | ||
|
||
Ok(os_str.to_os_string()) | ||
} | ||
|
||
#[cfg(windows)] | ||
{ | ||
// Take the quick and easy shortcut if UTF-8 | ||
if let Ok(utf8_string) = pystring.to_str() { | ||
return Ok(utf8_string.to_owned().into()); | ||
} | ||
|
||
// Get an owned allocated wide char buffer from PyString, which we have to deallocate | ||
// ourselves | ||
let size = | ||
unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), std::ptr::null_mut(), 0) }; | ||
if size == -1 { | ||
return Err(PyErr::fetch(ob.py())); | ||
} | ||
|
||
let mut buffer = vec![0; size as usize]; | ||
let bytes_read = | ||
unsafe { ffi::PyUnicode_AsWideChar(pystring.as_ptr(), buffer.as_mut_ptr(), size) }; | ||
assert_eq!(bytes_read, size); | ||
|
||
// Copy wide char buffer into OsString | ||
let os_string = std::os::windows::ffi::OsStringExt::from_wide(&buffer); | ||
|
||
Ok(os_string) | ||
} | ||
} | ||
} | ||
|
||
impl IntoPy<PyObject> for &'_ OsStr { | ||
#[inline] | ||
fn into_py(self, py: Python) -> PyObject { | ||
self.to_object(py) | ||
} | ||
} | ||
|
||
impl ToPyObject for Cow<'_, OsStr> { | ||
#[inline] | ||
fn to_object(&self, py: Python) -> PyObject { | ||
(&self as &OsStr).to_object(py) | ||
} | ||
} | ||
|
||
impl IntoPy<PyObject> for Cow<'_, OsStr> { | ||
#[inline] | ||
fn into_py(self, py: Python) -> PyObject { | ||
self.to_object(py) | ||
} | ||
} | ||
|
||
impl ToPyObject for OsString { | ||
#[inline] | ||
fn to_object(&self, py: Python) -> PyObject { | ||
(&self as &OsStr).to_object(py) | ||
} | ||
} | ||
|
||
impl IntoPy<PyObject> for OsString { | ||
fn into_py(self, py: Python) -> PyObject { | ||
self.to_object(py) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use crate::{types::PyString, IntoPy, PyObject, Python, ToPyObject}; | ||
use std::fmt::Debug; | ||
use std::{ | ||
borrow::Cow, | ||
ffi::{OsStr, OsString}, | ||
}; | ||
|
||
#[test] | ||
#[cfg(not(windows))] | ||
fn test_non_utf8_conversion() { | ||
Python::with_gil(|py| { | ||
use std::os::unix::ffi::OsStrExt; | ||
|
||
// this is not valid UTF-8 | ||
let payload = &[250, 251, 252, 253, 254, 255, 0, 255]; | ||
let os_str = OsStr::from_bytes(payload); | ||
|
||
// do a roundtrip into Pythonland and back and compare | ||
let py_str: PyObject = os_str.into_py(py); | ||
let os_str_2: OsString = py_str.extract(py).unwrap(); | ||
assert_eq!(os_str, os_str_2); | ||
}); | ||
} | ||
|
||
#[test] | ||
fn test_topyobject_roundtrip() { | ||
Python::with_gil(|py| { | ||
fn test_roundtrip<T: ToPyObject + AsRef<OsStr> + Debug>(py: Python, obj: T) { | ||
let pyobject = obj.to_object(py); | ||
let pystring: &PyString = pyobject.extract(py).unwrap(); | ||
assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); | ||
let roundtripped_obj: OsString = pystring.extract().unwrap(); | ||
assert_eq!(obj.as_ref(), roundtripped_obj.as_os_str()); | ||
} | ||
let os_str = OsStr::new("Hello\0\n🐍"); | ||
test_roundtrip::<&OsStr>(py, os_str); | ||
test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Borrowed(os_str)); | ||
test_roundtrip::<Cow<'_, OsStr>>(py, Cow::Owned(os_str.to_os_string())); | ||
test_roundtrip::<OsString>(py, os_str.to_os_string()); | ||
}); | ||
} | ||
|
||
#[test] | ||
fn test_intopy_roundtrip() { | ||
Python::with_gil(|py| { | ||
fn test_roundtrip<T: IntoPy<PyObject> + AsRef<OsStr> + Debug + Clone>( | ||
py: Python, | ||
obj: T, | ||
) { | ||
let pyobject = obj.clone().into_py(py); | ||
let pystring: &PyString = pyobject.extract(py).unwrap(); | ||
assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); | ||
let roundtripped_obj: OsString = pystring.extract().unwrap(); | ||
assert!(obj.as_ref() == roundtripped_obj.as_os_str()); | ||
} | ||
let os_str = OsStr::new("Hello\0\n🐍"); | ||
test_roundtrip::<&OsStr>(py, os_str); | ||
test_roundtrip::<OsString>(py, os_str.to_os_string()); | ||
}) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
use crate::{FromPyObject, IntoPy, PyAny, PyObject, PyResult, Python, ToPyObject}; | ||
use std::borrow::Cow; | ||
use std::ffi::OsString; | ||
use std::path::{Path, PathBuf}; | ||
|
||
impl ToPyObject for Path { | ||
fn to_object(&self, py: Python) -> PyObject { | ||
self.as_os_str().to_object(py) | ||
} | ||
} | ||
|
||
// See osstr.rs for why there's no FromPyObject impl for &Path | ||
|
||
impl FromPyObject<'_> for PathBuf { | ||
fn extract(ob: &PyAny) -> PyResult<Self> { | ||
Ok(PathBuf::from(OsString::extract(ob)?)) | ||
} | ||
} | ||
|
||
impl<'a> IntoPy<PyObject> for &'a Path { | ||
#[inline] | ||
fn into_py(self, py: Python) -> PyObject { | ||
self.as_os_str().to_object(py) | ||
} | ||
} | ||
|
||
impl<'a> ToPyObject for Cow<'a, Path> { | ||
kangalio marked this conversation as resolved.
Show resolved
Hide resolved
|
||
#[inline] | ||
fn to_object(&self, py: Python) -> PyObject { | ||
self.as_os_str().to_object(py) | ||
} | ||
} | ||
|
||
impl<'a> IntoPy<PyObject> for Cow<'a, Path> { | ||
#[inline] | ||
fn into_py(self, py: Python) -> PyObject { | ||
self.to_object(py) | ||
} | ||
} | ||
|
||
impl ToPyObject for PathBuf { | ||
#[inline] | ||
fn to_object(&self, py: Python) -> PyObject { | ||
self.as_os_str().to_object(py) | ||
} | ||
} | ||
|
||
impl IntoPy<PyObject> for PathBuf { | ||
fn into_py(self, py: Python) -> PyObject { | ||
self.into_os_string().to_object(py) | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod test { | ||
use crate::{types::PyString, IntoPy, PyObject, Python, ToPyObject}; | ||
use std::borrow::Cow; | ||
use std::fmt::Debug; | ||
use std::path::{Path, PathBuf}; | ||
|
||
#[test] | ||
#[cfg(not(windows))] | ||
fn test_non_utf8_conversion() { | ||
Python::with_gil(|py| { | ||
use std::ffi::OsStr; | ||
use std::os::unix::ffi::OsStrExt; | ||
|
||
// this is not valid UTF-8 | ||
let payload = &[250, 251, 252, 253, 254, 255, 0, 255]; | ||
let path = Path::new(OsStr::from_bytes(payload)); | ||
|
||
// do a roundtrip into Pythonland and back and compare | ||
let py_str: PyObject = path.into_py(py); | ||
let path_2: PathBuf = py_str.extract(py).unwrap(); | ||
assert_eq!(path, path_2); | ||
}); | ||
} | ||
|
||
#[test] | ||
fn test_topyobject_roundtrip() { | ||
Python::with_gil(|py| { | ||
fn test_roundtrip<T: ToPyObject + AsRef<Path> + Debug>(py: Python, obj: T) { | ||
let pyobject = obj.to_object(py); | ||
let pystring: &PyString = pyobject.extract(py).unwrap(); | ||
assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); | ||
let roundtripped_obj: PathBuf = pystring.extract().unwrap(); | ||
assert_eq!(obj.as_ref(), roundtripped_obj.as_path()); | ||
} | ||
let path = Path::new("Hello\0\n🐍"); | ||
test_roundtrip::<&Path>(py, path); | ||
test_roundtrip::<Cow<'_, Path>>(py, Cow::Borrowed(path)); | ||
test_roundtrip::<Cow<'_, Path>>(py, Cow::Owned(path.to_path_buf())); | ||
test_roundtrip::<PathBuf>(py, path.to_path_buf()); | ||
Comment on lines
+90
to
+93
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. .as_ref() is only called to verify the end result. The conversion is done on the types as is. |
||
}); | ||
} | ||
|
||
#[test] | ||
fn test_intopy_roundtrip() { | ||
Python::with_gil(|py| { | ||
fn test_roundtrip<T: IntoPy<PyObject> + AsRef<Path> + Debug + Clone>( | ||
py: Python, | ||
obj: T, | ||
) { | ||
let pyobject = obj.clone().into_py(py); | ||
let pystring: &PyString = pyobject.extract(py).unwrap(); | ||
assert_eq!(pystring.to_string_lossy(), obj.as_ref().to_string_lossy()); | ||
let roundtripped_obj: PathBuf = pystring.extract().unwrap(); | ||
assert!(obj.as_ref() == roundtripped_obj.as_path()); | ||
} | ||
let path = Path::new("Hello\0\n🐍"); | ||
test_roundtrip::<&Path>(py, path); | ||
test_roundtrip::<PathBuf>(py, path.to_path_buf()); | ||
}) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also needs
IntoPy<PyObject>
forCow<'_, OsStr>
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Haha same thing, seems like
types/string.rs
is missing this as wellThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ah - if you're willing, can you add it there also? 🙏
I think that adding
IntoPy
forCow
makes sense (as it's the trait needed to be able to return this type from#[pyfunction]
).