From 7d0a5fe85cb3b16070775d8abb3d75bb7186ffa6 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Fri, 13 Dec 2024 13:18:59 +0100 Subject: [PATCH 1/3] Run miri in CI. --- .github/workflows/rust.yml | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index bd12368..ab69fe7 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -19,17 +19,15 @@ jobs: key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} restore-keys: ${{ runner.os }}-cargo - name: Build - uses: actions-rs/cargo@v1 - with: - command: build - args: --workspace --all-targets --features indexmap,json,toml,yaml --color=always + run: cargo +stable build --workspace --all-targets --features indexmap,json,toml,yaml --color=always - name: Test - uses: actions-rs/cargo@v1 - with: - command: test - args: --workspace --all-targets --features indexmap,json,toml,yaml --color=always + run: cargo +stable test --workspace --all-targets --features indexmap,json,toml,yaml --color=always - name: Clippy uses: actions-rs/clippy-check@v1 with: token: ${{ secrets.GITHUB_TOKEN }} args: --workspace --all-targets --features indexmap,json,toml,yaml + - name: Install miri + run: rustup +nightly component add miri + - name: Run miri + run: cargo +nightly miri test --workspace --all-targets --features indexmap,json,toml,yaml --color=always From 1aadd380d0f2bf9ef1fa604dfa81b63ef30dae14 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Fri, 13 Dec 2024 21:12:10 +0100 Subject: [PATCH 2/3] Add more test for possible undefined behaviour in TemplateBuf. --- src/template/mod.rs | 46 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/src/template/mod.rs b/src/template/mod.rs index ab39a36..f69a5fc 100644 --- a/src/template/mod.rs +++ b/src/template/mod.rs @@ -466,4 +466,50 @@ mod tests { assert!(buf2.as_template().source() == source); assert!(buf2.into_source() == source); } + + #[test] + fn test_clone_byte_template_buf() { + let mut map: BTreeMap = BTreeMap::new(); + map.insert("name".into(), "world".into()); + let source = b"Hello ${name}!"; + let_assert!(Ok(buf1) = ByteTemplateBuf::from_vec(source.into())); + let buf2 = buf1.clone(); + let mut string = buf1.into_source(); + string.as_mut_slice()[..5].make_ascii_uppercase(); + check!(let Ok(b"Hello world!") = buf2.expand(&map).as_deref()); + assert!(buf2.as_template().source() == source); + assert!(buf2.into_source() == source); + } + + #[test] + fn test_move_template_buf() { + #[inline(never)] + fn check_template(buf: TemplateBuf) { + let mut map: BTreeMap = BTreeMap::new(); + map.insert("name".into(), "world".into()); + assert!(buf.as_template().source() == "Hello ${name}!"); + let_assert!(Ok(expanded) = buf.as_template().expand(&map)); + assert!(expanded == "Hello world!"); + } + + let source = "Hello ${name}!"; + let_assert!(Ok(buf1) = TemplateBuf::from_string(source.into())); + check_template(buf1); + } + + #[test] + fn test_move_byte_template_buf() { + #[inline(never)] + fn check_template(buf: ByteTemplateBuf) { + let mut map: BTreeMap = BTreeMap::new(); + map.insert("name".into(), "world".into()); + assert!(buf.as_template().source() == b"Hello ${name}!"); + let_assert!(Ok(expanded) = buf.as_template().expand(&map)); + assert!(expanded == b"Hello world!"); + } + + let source = b"Hello ${name}!"; + let_assert!(Ok(buf1) = ByteTemplateBuf::from_vec(source.into())); + check_template(buf1); + } } From 28b2e9f9ed4dc2190e0d6b09590bf0a1ce902ef7 Mon Sep 17 00:00:00 2001 From: Maarten de Vries Date: Fri, 13 Dec 2024 12:45:52 +0100 Subject: [PATCH 3/3] Store the wrapped template of TemplateBuf in MaybeUninit. This avoids having a real reference to the source data, which should avoid UB from aliasing the `String`/`Vec`. --- src/lib.rs | 2 + src/non_aliasing.rs | 37 ++++++++++++++++ src/template/mod.rs | 101 +++++++++++++++++++++++++++----------------- 3 files changed, 101 insertions(+), 39 deletions(-) create mode 100644 src/non_aliasing.rs diff --git a/src/lib.rs b/src/lib.rs index 18b106d..be065c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,6 +96,8 @@ mod features; #[allow(unused_imports)] // Might not re-export anything if all features are disabled. pub use features::*; +mod non_aliasing; + /// Substitute variables in a string. /// /// Variables have the form `$NAME`, `${NAME}` or `${NAME:default}`. diff --git a/src/non_aliasing.rs b/src/non_aliasing.rs new file mode 100644 index 0000000..ec5c51e --- /dev/null +++ b/src/non_aliasing.rs @@ -0,0 +1,37 @@ +use core::mem::MaybeUninit; + +/// Simple wrapper around [`MaybeUninit`] that guarantees the type is initialized. +/// +/// This may sound odd, but the compiler can not assume that `MaybeUninit` is initialized, +/// but as far is the compiler is concerned, `MaybeUninit` is not keeping any references around, even if `T` would. +/// So `NonAliasing` is not aliassing anything untill you call `inner()` to get your `&T` back. +/// +/// We use this to create a (hopefully) sound self referential struct in `TemplateBuf` and `ByteTemplateBuf`. +pub struct NonAliasing { + inner: MaybeUninit, +} + +impl NonAliasing { + pub fn new(inner: T) -> Self { + let inner = MaybeUninit::new(inner); + Self { inner } + } + + pub fn inner(&self) -> &T { + // SAFETY: We always initialize `inner` in the constructor. + unsafe { + self.inner.assume_init_ref() + } + } +} + +impl Drop for NonAliasing { + fn drop(&mut self) { + // SAFETY: We always initialize `inner` in the constructor, + // the API only exposes `assume_init_ref()`, + // and we're in the destructor, so nobody is going to call assume_init_read() again. + unsafe { + drop(self.inner.assume_init_read()) + } + } +} diff --git a/src/template/mod.rs b/src/template/mod.rs index f69a5fc..1303b53 100644 --- a/src/template/mod.rs +++ b/src/template/mod.rs @@ -1,5 +1,8 @@ -use crate::error::{ExpandError, ParseError}; +use core::pin::Pin; + use crate::VariableMap; +use crate::error::{ExpandError, ParseError}; +use crate::non_aliasing::NonAliasing; mod raw; @@ -94,22 +97,27 @@ impl<'a> Template<'a> { pub struct TemplateBuf { // SAFETY: To avoid dangling references, Template must be dropped before // source, therefore the template field must be precede the source field. - template: Template<'static>, - source: String, + // + // SAFETY: We use NonAliasing to avoid aliassing the source data directly. + // We only re-create the reference to the source when we call template.inner(). + template: NonAliasing>, + source: Pin, } impl Clone for TemplateBuf { fn clone(&self) -> Self { let source = self.source.clone(); + let raw = self.template.inner().raw.clone(); + let template = Template { - raw: self.template.raw.clone(), - source: &source, + raw, + source: &*source, }; - // SAFETY: - // The str slice given to `template` must remain valid. + // SAFETY: The str slice given to `template` must remain valid. // Since `String` keeps data on the heap, it remains valid when the `source` is moved. // We MUST ensure we do not modify, drop or overwrite `source`. let template = unsafe { template.transmute_lifetime() }; + let template = NonAliasing::new(template); Self { template, source, @@ -120,7 +128,9 @@ impl Clone for TemplateBuf { impl std::fmt::Debug for TemplateBuf { #[inline] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_tuple("TemplateBuf").field(&self.source).finish() + f.debug_tuple("TemplateBuf") + .field(&self.template.inner().source()) + .finish() } } @@ -139,29 +149,30 @@ impl TemplateBuf { /// You can escape dollar signs, backslashes, colons and braces with a backslash. #[inline] pub fn from_string(source: String) -> Result { - let template = Template::from_str(&source)?; + let source = Pin::new(source); + let template = Template::from_str(&*source)?; - // SAFETY: - // The str slice given to `template` must remain valid. + // SAFETY: The str slice given to `template` must remain valid. // Since `String` keeps data on the heap, it remains valid when the `source` is moved. // We MUST ensure we do not modify, drop or overwrite `source`. let template = unsafe { template.transmute_lifetime() }; + let template = NonAliasing::new(template); Ok(Self { source, template }) } /// Consume the template to get the original source string. #[inline] pub fn into_source(self) -> String { - // SAFETY: Drop template before source to avoid dangling reference + // SAFETY: Drop `template` before moving `source` to avoid dangling reference. drop(self.template); - self.source + Pin::into_inner(self.source) } /// Borrow the template. #[inline] #[allow(clippy::needless_lifetimes)] pub fn as_template<'a>(&'a self) -> &'a Template<'a> { - &self.template + self.template.inner() } /// Expand the template. @@ -175,7 +186,7 @@ impl TemplateBuf { M: VariableMap<'b> + ?Sized, M::Value: AsRef, { - self.template.expand(variables) + self.as_template().expand(variables) } } @@ -203,18 +214,18 @@ impl From<&Template<'_>> for TemplateBuf { impl From> for TemplateBuf { #[inline] fn from(other: Template<'_>) -> Self { - let source: String = other.source.into(); + let source: Pin = Pin::new(other.source.into()); let template = Template { - source: source.as_str(), + source: &*source, raw: other.raw, }; - // SAFETY: - // The str slice given to `template` must remain valid. + // SAFETY: The slice given to `template` must remain valid. // Since `String` keeps data on the heap, it remains valid when the `source` is moved. // We MUST ensure we do not modify, drop or overwrite `source`. let template = unsafe { template.transmute_lifetime() }; + let template = NonAliasing::new(template); Self { source, template } } @@ -309,21 +320,30 @@ impl<'a> ByteTemplate<'a> { pub struct ByteTemplateBuf { // SAFETY: To avoid dangling references, Template must be dropped before // source, therefore the template field must be precede the source field. - template: ByteTemplate<'static>, - source: Vec, + // + // SAFETY: We use NonAliasing to avoid aliassing the source data directly. + // We only re-create the reference to the source when we call template.inner(). + template: NonAliasing>, + + source: Pin>, } impl Clone for ByteTemplateBuf { fn clone(&self) -> Self { let source = self.source.clone(); + let raw = self.template.inner().raw.clone(); + let template = ByteTemplate { - raw: self.template.raw.clone(), - source: &source, + raw, + source: &*source, }; - // The slice given to `template` must remain valid. - // Since `Vec` keeps data on the heap, it remains valid when the `source` is moved. + + // SAFETY: The slice given to `template` must remain valid. + // Since `Pin>` keeps data on the heap, it remains valid when the `source` is moved. // We MUST ensure we do not modify, drop or overwrite `source`. let template = unsafe { template.transmute_lifetime() }; + let template = NonAliasing::new(template); + Self { template, source, @@ -335,7 +355,7 @@ impl std::fmt::Debug for ByteTemplateBuf { #[inline] fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_tuple("ByteTemplateBuf") - .field(&DebugByteString(&self.source)) + .field(&DebugByteString(self.as_template().source())) .finish() } } @@ -353,29 +373,31 @@ impl ByteTemplateBuf { /// You can escape dollar signs, backslashes, colons and braces with a backslash. #[inline] pub fn from_vec(source: Vec) -> Result { - let template = ByteTemplate::from_slice(&source)?; + let source = Pin::new(source); + let template = ByteTemplate::from_slice(&*source)?; - // SAFETY: - // The slice given to `template` must remain valid. + // SAFETY: The slice given to `template` must remain valid. // Since `Vec` keeps data on the heap, it remains valid when the `source` is moved. // We MUST ensure we do not modify, drop or overwrite `source`. let template = unsafe { std::mem::transmute::, ByteTemplate<'static>>(template) }; + let template = NonAliasing::new(template); + Ok(Self { source, template }) } /// Consume the template to get the original source vector. #[inline] pub fn into_source(self) -> Vec { - // SAFETY: Drop template before source to avoid dangling reference + // SAFETY: Drop `template` before moving `source` to avoid dangling reference. drop(self.template); - self.source + Pin::into_inner(self.source) } /// Borrow the template. #[inline] #[allow(clippy::needless_lifetimes)] - pub fn as_template<'a>(&'a self) -> &'a ByteTemplate<'static> { - &self.template + pub fn as_template<'a>(&'a self) -> &'a ByteTemplate<'a> { + self.template.inner() } /// Expand the template. @@ -389,7 +411,7 @@ impl ByteTemplateBuf { M: VariableMap<'b> + ?Sized, M::Value: AsRef<[u8]>, { - self.template.expand(variables) + self.as_template().expand(variables) } } @@ -418,17 +440,18 @@ impl From> for ByteTemplateBuf { #[inline] fn from(other: ByteTemplate<'_>) -> Self { let source: Vec = other.source.into(); + let source = Pin::new(source); let template = ByteTemplate { - source: source.as_slice(), + source: &*source, raw: other.raw, }; - // SAFETY: - // The slice given to `template` must remain valid. - // Since `Vec` keeps data on the heap, it remains valid when the `source` is moved. + // SAFETY: The slice given to `template` must remain valid. + // Since `Pin>` keeps data on the heap, it remains valid when the `source` is moved. // We MUST ensure we do not modify, drop or overwrite `source`. let template = unsafe { template.transmute_lifetime() }; + let template = NonAliasing::new(template); Self { source, template } } @@ -454,7 +477,7 @@ mod tests { use std::collections::BTreeMap; #[test] - fn test_clone() { + fn test_clone_template_buf() { let mut map: BTreeMap = BTreeMap::new(); map.insert("name".into(), "world".into()); let source = "Hello ${name}!";