Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get !nonnull metadata on slice iterators, without assumes #113344

Merged
merged 1 commit into from
Jul 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions library/core/src/ptr/non_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,30 @@ impl<T: ?Sized> NonNull<T> {
// And the caller promised the `delta` is sound to add.
unsafe { NonNull { pointer: self.pointer.add(delta) } }
}

/// See [`pointer::sub`] for semantics and safety requirements.
#[inline]
pub(crate) const unsafe fn sub(self, delta: usize) -> Self
where
T: Sized,
{
// SAFETY: We require that the delta stays in-bounds of the object, and
// thus it cannot become null, as no legal objects can be allocated
// in such as way that the null address is part of them.
// And the caller promised the `delta` is sound to subtract.
unsafe { NonNull { pointer: self.pointer.sub(delta) } }
}

/// See [`pointer::sub_ptr`] for semantics and safety requirements.
#[inline]
pub(crate) const unsafe fn sub_ptr(self, subtrahend: Self) -> usize
where
T: Sized,
{
// SAFETY: The caller promised that this is safe to do, and
// the non-nullness is irrelevant to the operation.
unsafe { self.pointer.sub_ptr(subtrahend.pointer) }
}
}

impl<T> NonNull<[T]> {
Expand Down
25 changes: 13 additions & 12 deletions library/core/src/slice/iter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::iter::{
use crate::marker::{PhantomData, Send, Sized, Sync};
use crate::mem::{self, SizedTypeProperties};
use crate::num::NonZeroUsize;
use crate::ptr::{invalid, invalid_mut, NonNull};
use crate::ptr::{self, invalid, invalid_mut, NonNull};

use super::{from_raw_parts, from_raw_parts_mut};

Expand Down Expand Up @@ -68,7 +68,7 @@ pub struct Iter<'a, T: 'a> {
/// For non-ZSTs, the non-null pointer to the past-the-end element.
///
/// For ZSTs, this is `ptr::invalid(len)`.
end: *const T,
end_or_len: *const T,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wish this could use a union, but it can't because that makes it able to store undef, which we don't want.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Time for a MostCertainlyInitialized<T>? 😄

_marker: PhantomData<&'a T>,
}

Expand All @@ -90,9 +90,9 @@ impl<'a, T> Iter<'a, T> {
let ptr = slice.as_ptr();
// SAFETY: Similar to `IterMut::new`.
unsafe {
let end = if T::IS_ZST { invalid(slice.len()) } else { ptr.add(slice.len()) };
let end_or_len = if T::IS_ZST { invalid(slice.len()) } else { ptr.add(slice.len()) };

Self { ptr: NonNull::new_unchecked(ptr as *mut T), end, _marker: PhantomData }
Self { ptr: NonNull::new_unchecked(ptr as *mut T), end_or_len, _marker: PhantomData }
}
}

Expand Down Expand Up @@ -128,7 +128,7 @@ impl<'a, T> Iter<'a, T> {
}
}

iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, {
iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, as_ref, {
fn is_sorted_by<F>(self, mut compare: F) -> bool
where
Self: Sized,
Expand All @@ -142,7 +142,7 @@ iterator! {struct Iter -> *const T, &'a T, const, {/* no mut */}, {
impl<T> Clone for Iter<'_, T> {
#[inline]
fn clone(&self) -> Self {
Iter { ptr: self.ptr, end: self.end, _marker: self._marker }
Iter { ptr: self.ptr, end_or_len: self.end_or_len, _marker: self._marker }
}
}

Expand Down Expand Up @@ -189,7 +189,7 @@ pub struct IterMut<'a, T: 'a> {
/// For non-ZSTs, the non-null pointer to the past-the-end element.
///
/// For ZSTs, this is `ptr::invalid_mut(len)`.
end: *mut T,
end_or_len: *mut T,
_marker: PhantomData<&'a mut T>,
}

Expand Down Expand Up @@ -220,15 +220,16 @@ impl<'a, T> IterMut<'a, T> {
// for direct pointer equality with `ptr` to check if the iterator is
// done.
//
// In the case of a ZST, the end pointer is just the start pointer plus
// the length, to also allows for the fast `ptr == end` check.
// In the case of a ZST, the end pointer is just the length. It's never
// used as a pointer at all, and thus it's fine to have no provenance.
//
// See the `next_unchecked!` and `is_empty!` macros as well as the
// `post_inc_start` method for more information.
unsafe {
let end = if T::IS_ZST { invalid_mut(slice.len()) } else { ptr.add(slice.len()) };
let end_or_len =
if T::IS_ZST { invalid_mut(slice.len()) } else { ptr.add(slice.len()) };

Self { ptr: NonNull::new_unchecked(ptr), end, _marker: PhantomData }
Self { ptr: NonNull::new_unchecked(ptr), end_or_len, _marker: PhantomData }
}
}

Expand Down Expand Up @@ -360,7 +361,7 @@ impl<T> AsRef<[T]> for IterMut<'_, T> {
// }
// }

iterator! {struct IterMut -> *mut T, &'a mut T, mut, {mut}, {}}
iterator! {struct IterMut -> *mut T, &'a mut T, mut, {mut}, as_mut, {}}

/// An internal abstraction over the splitting iterators, so that
/// splitn, splitn_mut etc can be implemented once.
Expand Down
146 changes: 79 additions & 67 deletions library/core/src/slice/iter/macros.rs
Original file line number Diff line number Diff line change
@@ -1,45 +1,62 @@
//! Macros used by iterators of slice.

// Shrinks the iterator when T is a ZST, setting the length to `new_len`.
// `new_len` must not exceed `self.len()`.
macro_rules! zst_set_len {
($self: ident, $new_len: expr) => {{
/// Convenience & performance macro for consuming the `end_or_len` field, by
/// giving a `(&mut) usize` or `(&mut) NonNull<T>` depending whether `T` is
/// or is not a ZST respectively.
///
/// Internally, this reads the `end` through a pointer-to-`NonNull` so that
/// it'll get the appropriate non-null metadata in the backend without needing
/// to call `assume` manually.
macro_rules! if_zst {
(mut $this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

// SAFETY: same as `invalid(_mut)`, but the macro doesn't know
// which versions of that function to call, so open-code it.
$self.end = unsafe { mem::transmute::<usize, _>($new_len) };
if T::IS_ZST {
// SAFETY: for ZSTs, the pointer is storing a provenance-free length,
// so consuming and updating it as a `usize` is fine.
let $len = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<usize>() };
$zst_body
} else {
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
let $end = unsafe { &mut *ptr::addr_of_mut!($this.end_or_len).cast::<NonNull<T>>() };
$other_body
}
}};
}
($this:ident, $len:ident => $zst_body:expr, $end:ident => $other_body:expr,) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

// Shrinks the iterator when T is a ZST, reducing the length by `n`.
// `n` must not exceed `self.len()`.
macro_rules! zst_shrink {
($self: ident, $n: ident) => {
let new_len = $self.end.addr() - $n;
zst_set_len!($self, new_len);
};
if T::IS_ZST {
let $len = $this.end_or_len.addr();
$zst_body
} else {
// SAFETY: for non-ZSTs, the type invariant ensures it cannot be null
let $end = unsafe { *ptr::addr_of!($this.end_or_len).cast::<NonNull<T>>() };
$other_body
}
}};
}

// Inlining is_empty and len makes a huge performance difference
macro_rules! is_empty {
($self: ident) => {
if T::IS_ZST { $self.end.addr() == 0 } else { $self.ptr.as_ptr() as *const _ == $self.end }
if_zst!($self,
len => len == 0,
end => $self.ptr == end,
)
};
}

macro_rules! len {
($self: ident) => {{
#![allow(unused_unsafe)] // we're sometimes used within an unsafe block

if T::IS_ZST {
$self.end.addr()
} else {
// To get rid of some bounds checks (see `position`), we use ptr_sub instead of
// offset_from (Tested by `codegen/slice-position-bounds-check`.)
// SAFETY: by the type invariant pointers are aligned and `start <= end`
unsafe { $self.end.sub_ptr($self.ptr.as_ptr()) }
}
if_zst!($self,
len => len,
end => {
// To get rid of some bounds checks (see `position`), we use ptr_sub instead of
// offset_from (Tested by `codegen/slice-position-bounds-check`.)
// SAFETY: by the type invariant pointers are aligned and `start <= end`
unsafe { end.sub_ptr($self.ptr) }
},
)
}};
}

Expand All @@ -50,20 +67,21 @@ macro_rules! iterator {
$elem:ty,
$raw_mut:tt,
{$( $mut_:tt )?},
$into_ref:ident,
{$($extra:tt)*}
) => {
// Returns the first element and moves the start of the iterator forwards by 1.
// Greatly improves performance compared to an inlined function. The iterator
// must not be empty.
macro_rules! next_unchecked {
($self: ident) => {& $( $mut_ )? *$self.post_inc_start(1)}
($self: ident) => { $self.post_inc_start(1).$into_ref() }
}

// Returns the last element and moves the end of the iterator backwards by 1.
// Greatly improves performance compared to an inlined function. The iterator
// must not be empty.
macro_rules! next_back_unchecked {
($self: ident) => {& $( $mut_ )? *$self.pre_dec_end(1)}
($self: ident) => { $self.pre_dec_end(1).$into_ref() }
}

impl<'a, T> $name<'a, T> {
Expand All @@ -80,33 +98,40 @@ macro_rules! iterator {
// returning the old start.
// Unsafe because the offset must not exceed `self.len()`.
#[inline(always)]
unsafe fn post_inc_start(&mut self, offset: usize) -> * $raw_mut T {
unsafe fn post_inc_start(&mut self, offset: usize) -> NonNull<T> {
let old = self.ptr;
if T::IS_ZST {
zst_shrink!(self, offset);
} else {
// SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
// so this new pointer is inside `self` and thus guaranteed to be non-null.
self.ptr = unsafe { self.ptr.add(offset) };

// SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
// so this new pointer is inside `self` and thus guaranteed to be non-null.
unsafe {
if_zst!(mut self,
len => *len = len.unchecked_sub(offset),
_end => self.ptr = self.ptr.add(offset),
);
}
old.as_ptr()
old
}

// Helper function for moving the end of the iterator backwards by `offset` elements,
// returning the new end.
// Unsafe because the offset must not exceed `self.len()`.
#[inline(always)]
unsafe fn pre_dec_end(&mut self, offset: usize) -> * $raw_mut T {
if T::IS_ZST {
zst_shrink!(self, offset);
self.ptr.as_ptr()
} else {
unsafe fn pre_dec_end(&mut self, offset: usize) -> NonNull<T> {
if_zst!(mut self,
// SAFETY: By our precondition, `offset` can be at most the
// current length, so the subtraction can never overflow.
len => unsafe {
*len = len.unchecked_sub(offset);
self.ptr
},
// SAFETY: the caller guarantees that `offset` doesn't exceed `self.len()`,
// which is guaranteed to not overflow an `isize`. Also, the resulting pointer
// is in bounds of `slice`, which fulfills the other requirements for `offset`.
self.end = unsafe { self.end.sub(offset) };
self.end
}
end => unsafe {
*end = end.sub(offset);
*end
},
)
}
}

Expand All @@ -131,13 +156,9 @@ macro_rules! iterator {
fn next(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks

// SAFETY: `assume` call is safe because slices over non-ZSTs must
// have a non-null end pointer. The call to `next_unchecked!` is
// SAFETY: The call to `next_unchecked!` is
// safe since we check if the iterator is empty first.
unsafe {
if !<T>::IS_ZST {
assume(!self.end.is_null());
}
if is_empty!(self) {
None
} else {
Expand All @@ -161,14 +182,10 @@ macro_rules! iterator {
fn nth(&mut self, n: usize) -> Option<$elem> {
if n >= len!(self) {
// This iterator is now empty.
if T::IS_ZST {
zst_set_len!(self, 0);
} else {
// SAFETY: end can't be 0 if T isn't ZST because ptr isn't 0 and end >= ptr
unsafe {
self.ptr = NonNull::new_unchecked(self.end as *mut T);
}
}
if_zst!(mut self,
len => *len = 0,
end => self.ptr = *end,
);
return None;
}
// SAFETY: We are in bounds. `post_inc_start` does the right thing even for ZSTs.
Expand Down Expand Up @@ -375,13 +392,9 @@ macro_rules! iterator {
fn next_back(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks

// SAFETY: `assume` call is safe because slices over non-ZSTs must
// have a non-null end pointer. The call to `next_back_unchecked!`
// SAFETY: The call to `next_back_unchecked!`
// is safe since we check if the iterator is empty first.
unsafe {
if !<T>::IS_ZST {
assume(!self.end.is_null());
}
if is_empty!(self) {
None
} else {
Expand All @@ -394,11 +407,10 @@ macro_rules! iterator {
fn nth_back(&mut self, n: usize) -> Option<$elem> {
if n >= len!(self) {
// This iterator is now empty.
if T::IS_ZST {
zst_set_len!(self, 0);
} else {
self.end = self.ptr.as_ptr();
}
if_zst!(mut self,
len => *len = 0,
end => *end = self.ptr,
);
return None;
}
// SAFETY: We are in bounds. `pre_dec_end` does the right thing even for ZSTs.
Expand Down
1 change: 1 addition & 0 deletions tests/codegen/iter-repeat-n-trivial-drop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ pub fn iter_repeat_n_next(it: &mut std::iter::RepeatN<NotCopy>) -> Option<NotCop
// CHECK: [[EMPTY]]:
// CHECK-NOT: br
// CHECK: phi i16
// CHECK-SAME: [ %[[VAL]], %[[NOT_EMPTY]] ]
// CHECK-NOT: br
// CHECK: ret

Expand Down
4 changes: 2 additions & 2 deletions tests/codegen/slice-iter-len-eq-zero.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ type Demo = [u8; 3];
#[no_mangle]
pub fn slice_iter_len_eq_zero(y: std::slice::Iter<'_, Demo>) -> bool {
// CHECK-NOT: sub
// CHECK: %_0 = icmp eq {{i8\*|ptr}} {{%1|%0}}, {{%1|%0}}
// CHECK: ret i1 %_0
// CHECK: %[[RET:.+]] = icmp eq {{i8\*|ptr}} {{%1|%0}}, {{%1|%0}}
// CHECK: ret i1 %[[RET]]
y.len() == 0
}

Expand Down
Loading