From e335870c888580737d8774d8e800a7883db91cb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Vlas=C3=A1k?= Date: Sat, 22 Feb 2025 11:53:28 +0100 Subject: [PATCH] Fix incomplete attached file problem Currently, we only have `ttstub_input_read` function, which corresponds to Rust's `read_exact`. This seems to work for most of xetex, but it's not semantically a replacement for buffered reading with `fread`, which is used in a dvipdfmx. One of such places is reading a file to be included in a PDF file as attachment. This is done through dvipdfmx special like this: \special{pdf:fstream @SourceFile (example.txt)} These reads are done in 1024 byte chunks, and if we read these chunks with read_exact, we are going to fail unless the file size is not exactly divisible by 1024. Tectonic would report this as: warning: 1024-byte read failed caused by: failed to fill whole buffer To fix this issue in a non-intrusive way, we introduce a new variant of reads - `ttstub_input_read_partial`, which follows Rust's `read` semantics, and not `read_exact`'s, and we use it for the dvipdfmx fstream reading. Likely any place that calls `ttstub_input_read` in a loop is also a candidate for using `ttstub_input_read_partial`, and perhaps it would work better for others. But there definitely are places in xetex that depend on the `read_exact` semantics. For example, when I naively just tried to make all reads into `ttstub_input_read_partial`, xetex failed pretty quickly on "undumping the format file", which has a few initial 4 byte reads, but then it tries to undump big string pool which was many kilobytes in size. Rust `read` however would only return 8196 minus couple of the the 4 bytes, which would fail a check on the TeX side which expects full read. AFAICT xetex uses `fread`, so this probably just comes down to buffering differences between `fread` and Rust's `Reader` / `BufReader`. Relates to #935 Fixes #1260 --- crates/bridge_core/src/lib.rs | 34 +++++++++++++++++++ crates/bridge_core/support/support.c | 5 +++ .../support/tectonic_bridge_core.h | 1 + .../support/tectonic_bridge_core_generated.h | 2 ++ crates/pdf_io/pdf_io/dpx-spc_pdfm.c | 2 +- 5 files changed, 43 insertions(+), 1 deletion(-) diff --git a/crates/bridge_core/src/lib.rs b/crates/bridge_core/src/lib.rs index 6541c0d7a..55cea1544 100644 --- a/crates/bridge_core/src/lib.rs +++ b/crates/bridge_core/src/lib.rs @@ -674,6 +674,11 @@ impl<'a> CoreBridgeState<'a> { rhandle.read_exact(buf).map_err(Error::from) } + fn input_read_partial(&mut self, handle: *mut InputHandle, buf: &mut [u8]) -> Result { + let rhandle: &mut InputHandle = unsafe { &mut *handle }; + rhandle.read(buf).map_err(Error::from) + } + fn input_getc(&mut self, handle: *mut InputHandle) -> Result { let rhandle: &mut InputHandle = unsafe { &mut *handle }; rhandle.getc() @@ -1181,6 +1186,9 @@ pub extern "C" fn ttbc_input_ungetc( /// Read data from a Tectonic input handle /// +/// This read corresponds to Rust's read_exact, i.e. it will return exactly the number of requested +/// bytes or error (-1). +/// /// # Safety /// /// This function is unsafe because it dereferences raw C pointers. @@ -1202,6 +1210,32 @@ pub unsafe extern "C" fn ttbc_input_read( } } +/// Read data from a Tectonic input handle +/// +/// This read corresponds to Rust's read, i.e. it can return less bytes than requested (and does +/// when buffering) +/// +/// # Safety +/// +/// This function is unsafe because it dereferences raw C pointers. +#[no_mangle] +pub unsafe extern "C" fn ttbc_input_read_partial( + es: &mut CoreBridgeState, + handle: *mut InputHandle, + data: *mut u8, + len: libc::size_t, +) -> libc::ssize_t { + let rdata = slice::from_raw_parts_mut(data, len); + + match es.input_read_partial(handle, rdata) { + Ok(size) => size as isize, + Err(e) => { + tt_warning!(es.status, "{}-byte read failed", len; e); + -1 + } + } +} + /// Close a Tectonic input file. #[no_mangle] pub extern "C" fn ttbc_input_close( diff --git a/crates/bridge_core/support/support.c b/crates/bridge_core/support/support.c index 3ae7fff8b..e6d3e5365 100644 --- a/crates/bridge_core/support/support.c +++ b/crates/bridge_core/support/support.c @@ -285,6 +285,11 @@ ttstub_input_read(rust_input_handle_t handle, char *data, size_t len) return ttbc_input_read(tectonic_global_bridge_core, handle, (uint8_t *) data, len); } +ssize_t +ttstub_input_read_partial(rust_input_handle_t handle, char *data, size_t len) +{ + return ttbc_input_read_partial(tectonic_global_bridge_core, handle, (uint8_t *) data, len); +} int ttstub_input_getc(rust_input_handle_t handle) diff --git a/crates/bridge_core/support/tectonic_bridge_core.h b/crates/bridge_core/support/tectonic_bridge_core.h index 0ae5495c7..d118db71f 100644 --- a/crates/bridge_core/support/tectonic_bridge_core.h +++ b/crates/bridge_core/support/tectonic_bridge_core.h @@ -243,6 +243,7 @@ size_t ttstub_input_get_size(rust_input_handle_t handle); time_t ttstub_input_get_mtime(rust_input_handle_t handle); size_t ttstub_input_seek(rust_input_handle_t handle, ssize_t offset, int whence); ssize_t ttstub_input_read(rust_input_handle_t handle, char *data, size_t len); +ssize_t ttstub_input_read_partial(rust_input_handle_t handle, char *data, size_t len); int ttstub_input_getc(rust_input_handle_t handle); int ttstub_input_ungetc(rust_input_handle_t handle, int ch); int ttstub_input_close(rust_input_handle_t handle); diff --git a/crates/bridge_core/support/tectonic_bridge_core_generated.h b/crates/bridge_core/support/tectonic_bridge_core_generated.h index 76b9da6fb..f4a5cb8eb 100644 --- a/crates/bridge_core/support/tectonic_bridge_core_generated.h +++ b/crates/bridge_core/support/tectonic_bridge_core_generated.h @@ -296,6 +296,8 @@ int ttbc_input_ungetc(ttbc_state_t *es, ttbc_input_handle_t *handle, int ch); */ ssize_t ttbc_input_read(ttbc_state_t *es, ttbc_input_handle_t *handle, uint8_t *data, size_t len); +ssize_t ttbc_input_read_partial(ttbc_state_t *es, ttbc_input_handle_t *handle, uint8_t *data, size_t len); + /** * Close a Tectonic input file. */ diff --git a/crates/pdf_io/pdf_io/dpx-spc_pdfm.c b/crates/pdf_io/pdf_io/dpx-spc_pdfm.c index 36b584ce2..b4d1fd97a 100644 --- a/crates/pdf_io/pdf_io/dpx-spc_pdfm.c +++ b/crates/pdf_io/pdf_io/dpx-spc_pdfm.c @@ -1664,7 +1664,7 @@ spc_handler_pdfm_stream_with_type (struct spc_env *spe, struct spc_arg *args, in } fstream = pdf_new_stream(STREAM_COMPRESS); while ((nb_read = - ttstub_input_read(handle, work_buffer, WORK_BUFFER_SIZE)) > 0) + ttstub_input_read_partial(handle, work_buffer, WORK_BUFFER_SIZE)) > 0) pdf_add_stream(fstream, work_buffer, nb_read); ttstub_input_close(handle); break;