Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add wrapping_offset_from which allows wrapping but still requires ptrs to be for the same allocation #112837

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 18 additions & 13 deletions compiler/rustc_codegen_ssa/src/mir/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
bx.icmp(IntPredicate::IntEQ, a, b)
}

sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
sym::ptr_offset_from | sym::ptr_offset_from_unsigned | sym::ptr_wrapping_offset_from => {
let ty = substs.type_at(0);
let pointee_size = bx.layout_of(ty).size;

Expand All @@ -447,18 +447,23 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
let a = bx.ptrtoint(a, bx.type_isize());
let b = bx.ptrtoint(b, bx.type_isize());
let pointee_size = bx.const_usize(pointee_size.bytes());
if name == sym::ptr_offset_from {
// This is the same sequence that Clang emits for pointer subtraction.
// It can be neither `nsw` nor `nuw` because the input is treated as
// unsigned but then the output is treated as signed, so neither works.
let d = bx.sub(a, b);
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
bx.exactsdiv(d, pointee_size)
} else {
// The `_unsigned` version knows the relative ordering of the pointers,
// so can use `sub nuw` and `udiv exact` instead of dealing in signed.
let d = bx.unchecked_usub(a, b);
bx.exactudiv(d, pointee_size)
match name {
sym::ptr_offset_from | sym::ptr_wrapping_offset_from => {
// This is the same sequence that Clang emits for pointer subtraction.
// Even for `ptr_offset_from`, which cannot wrap, this can be neither `nsw`
// nor `nuw` because the input is treated as unsigned but then the output is
// treated as signed, so neither works.
let d = bx.sub(a, b);
// this is where the signed magic happens (notice the `s` in `exactsdiv`)
bx.exactsdiv(d, pointee_size)
}
sym::ptr_offset_from_unsigned => {
// The `_unsigned` version knows the relative ordering of the pointers,
// so can use `sub nuw` and `udiv exact` instead of dealing in signed.
let d = bx.unchecked_usub(a, b);
bx.exactudiv(d, pointee_size)
}
_ => bug!(),
}
}

Expand Down
89 changes: 49 additions & 40 deletions compiler/rustc_const_eval/src/interpret/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let offset_ptr = ptr.wrapping_signed_offset(offset_bytes, self);
self.write_pointer(offset_ptr, dest)?;
}
sym::ptr_offset_from | sym::ptr_offset_from_unsigned => {
sym::ptr_offset_from
| sym::ptr_offset_from_unsigned
| sym::ptr_wrapping_offset_from => {
let a = self.read_pointer(&args[0])?;
let b = self.read_pointer(&args[1])?;

Expand All @@ -288,6 +290,8 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
(Err(_), _) | (_, Err(_)) => {
// We managed to find a valid allocation for one pointer, but not the other.
// That means they are definitely not pointing to the same allocation.
// FIXME: if at least one pointer was a wildcard pointer, we should not throw UB here
// but just use their absolute addresses instead.
throw_ub_custom!(
fluent::const_eval_different_allocations,
name = intrinsic_name,
Expand Down Expand Up @@ -315,57 +319,62 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
let b_offset = ImmTy::from_uint(b_offset, usize_layout);
self.overflowing_binary_op(BinOp::Sub, &a_offset, &b_offset)?
};
if overflowed {
// a < b
if intrinsic_name == sym::ptr_offset_from_unsigned {
throw_ub_custom!(
fluent::const_eval_unsigned_offset_from_overflow,
a_offset = a_offset,
b_offset = b_offset,
);
}
// The signed form of the intrinsic allows this. If we interpret the
// difference as isize, we'll get the proper signed difference. If that
// seems *positive*, they were more than isize::MAX apart.
let dist = val.to_target_isize(self)?;
if dist >= 0 {
throw_ub_custom!(
fluent::const_eval_offset_from_underflow,
name = intrinsic_name,
);
}
dist
} else {
// b >= a
let dist = val.to_target_isize(self)?;
// If converting to isize produced a *negative* result, we had an overflow
// because they were more than isize::MAX apart.
if dist < 0 {
throw_ub_custom!(
fluent::const_eval_offset_from_overflow,
name = intrinsic_name,
);
let dist = val.to_target_isize(self)?;
if intrinsic_name != sym::ptr_wrapping_offset_from {
// Overflow check.
if overflowed {
// a < b
if intrinsic_name == sym::ptr_offset_from_unsigned {
throw_ub_custom!(
fluent::const_eval_unsigned_offset_from_overflow,
a_offset = a_offset,
b_offset = b_offset,
);
}
// The signed form of the intrinsic allows this. If we interpret the
// difference as isize, we'll get the proper signed difference. If that
// seems *positive*, they were more than isize::MAX apart.
if dist >= 0 {
throw_ub_custom!(
fluent::const_eval_offset_from_underflow,
name = intrinsic_name,
);
}
} else {
// b >= a, no overflow during subtraction.
// If converting to isize produced a *negative* result, we had an overflow
// when converting to `isize` because they were more than isize::MAX apart.
if dist < 0 {
throw_ub_custom!(
fluent::const_eval_offset_from_overflow,
name = intrinsic_name,
);
}
}
dist
}
dist
};

// Check that the range between them is dereferenceable ("in-bounds or one past the
// end of the same allocation"). This is like the check in ptr_offset_inbounds.
let min_ptr = if dist >= 0 { b } else { a };
self.check_ptr_access_align(
min_ptr,
Size::from_bytes(dist.unsigned_abs()),
Align::ONE,
CheckInAllocMsg::OffsetFromTest,
)?;
if intrinsic_name != sym::ptr_wrapping_offset_from {
let min_ptr = if dist >= 0 { b } else { a };
self.check_ptr_access_align(
min_ptr,
Size::from_bytes(dist.unsigned_abs()),
Align::ONE,
CheckInAllocMsg::OffsetFromTest,
)?;
}

// Perform division by size to compute return value.
let ret_layout = if intrinsic_name == sym::ptr_offset_from_unsigned {
assert!(0 <= dist && dist <= self.target_isize_max());
usize_layout
} else {
assert!(self.target_isize_min() <= dist && dist <= self.target_isize_max());
if intrinsic_name != sym::ptr_wrapping_offset_from {
assert!(self.target_isize_min() <= dist && dist <= self.target_isize_max());
}
isize_layout
};
let pointee_layout = self.layout_of(substs.type_at(0))?;
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ pub fn check_intrinsic_type(tcx: TyCtxt<'_>, it: &hir::ForeignItem<'_>) {
tcx.mk_unit(),
),

sym::ptr_offset_from => {
sym::ptr_offset_from | sym::ptr_wrapping_offset_from => {
(1, vec![tcx.mk_imm_ptr(param(0)), tcx.mk_imm_ptr(param(0))], tcx.types.isize)
}
sym::ptr_offset_from_unsigned => {
Expand Down
1 change: 1 addition & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,7 @@ symbols! {
ptr_offset_from,
ptr_offset_from_unsigned,
ptr_unique,
ptr_wrapping_offset_from,
pub_macro_rules,
pub_restricted,
public,
Expand Down
6 changes: 6 additions & 0 deletions library/core/src/intrinsics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2337,6 +2337,12 @@ extern "rust-intrinsic" {
#[rustc_nounwind]
pub fn ptr_offset_from_unsigned<T>(ptr: *const T, base: *const T) -> usize;

/// See documentation of `<*const T>::wrapping_offset_from` for details.
#[rustc_const_unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
#[rustc_nounwind]
#[cfg(not(bootstrap))]
pub fn ptr_wrapping_offset_from<T>(ptr: *const T, base: *const T) -> isize;

/// See documentation of `<*const T>::guaranteed_eq` for details.
/// Returns `2` if the result is unknown.
/// Returns `1` if the pointers are guaranteed equal
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@
//
// Library features:
// tidy-alphabetical-start
#![cfg_attr(not(bootstrap), feature(ptr_wrapping_offset_from))]
#![feature(char_indices_offset)]
#![feature(const_align_of_val)]
#![feature(const_align_of_val_raw)]
Expand Down
114 changes: 112 additions & 2 deletions library/core/src/ptr/const_ptr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,9 @@ impl<T: ?Sized> *const T {
/// Calculates the distance between two pointers. The returned value is in
/// units of T: the distance in bytes divided by `mem::size_of::<T>()`.
///
/// This function is the inverse of [`offset`].
/// This function is the inverse of [`offset`]: it is valid to call if and only if
/// `self` could have been computed as `origin.offset(n)` for some `n`, and it will
/// then return that `n`.
///
/// [`offset`]: #method.offset
///
Expand Down Expand Up @@ -644,6 +646,12 @@ impl<T: ?Sized> *const T {
/// (Note that [`offset`] and [`add`] also have a similar limitation and hence cannot be used on
/// such large allocations either.)
///
/// The requirement for pointers to be derived from the same allocated object is primarily
/// needed for `const`-compatibility: at compile-time, pointers into *different* allocated
/// object do not have a known distance to each other. However, the requirement also exists at
/// runtime, and may be exploited by optimizations. You can use `(self as usize).sub(origin as
/// usize) / mem::size_of::<T>()` to avoid this requirement.
///
/// [`add`]: #method.add
/// [allocated object]: crate::ptr#allocated-object
///
Expand Down Expand Up @@ -701,7 +709,7 @@ impl<T: ?Sized> *const T {
/// units of **bytes**.
///
/// This is purely a convenience for casting to a `u8` pointer and
/// using [offset_from][pointer::offset_from] on it. See that method for
/// using [`offset_from`][pointer::offset_from] on it. See that method for
/// documentation and safety requirements.
///
/// For non-`Sized` pointees this operation considers only the data pointers,
Expand Down Expand Up @@ -799,6 +807,108 @@ impl<T: ?Sized> *const T {
unsafe { intrinsics::ptr_offset_from_unsigned(self, origin) }
}

/// Calculates the distance between two pointers using wrapping arithmetic. The returned value
/// is in units of T: the distance in bytes divided by `mem::size_of::<T>()`.
///
/// This function is the inverse of [`wrapping_offset`]: it is valid to call if and only if
/// `self` could have been computed as `origin.wrapping_offset(n)` for some `n`, and it will
/// then return that `n`.
///
/// [`wrapping_offset`]: #method.wrapping_offset
///
/// # Safety
///
/// If any of the following conditions are violated, the result is Undefined
/// Behavior:
///
/// * Both pointers must be *derived from* a pointer to the same [allocated object].
/// (See below for an example.)
///
/// * The distance between the pointers, in bytes, must be an exact multiple
/// of the size of `T`.
///
/// Unlike [`offset_from`][pointer::offset_from], this method does *not* require the pointers to
/// be in-bounds of the object they are derived from, nor does it impose any restrictions
/// regarding the maximum distance or wrapping around the address space.
///
/// The requirement for pointers to be derived from the same allocated object is primarily
/// needed for `const`-compatibility: at compile-time, pointers into *different* allocated
/// object do not have a known distance to each other. However, the requirement also exists at
/// runtime, and may be exploited by optimizations. You can use `(self as usize).sub(origin as
/// usize) / mem::size_of::<T>()` to avoid this requirement.
///
/// [allocated object]: crate::ptr#allocated-object
///
/// # Panics
///
/// This function panics if `T` is a Zero-Sized Type ("ZST").
///
/// # Examples
///
/// Basic usage:
///
/// ```
/// #![feature(ptr_wrapping_offset_from)]
/// let a = [0; 2];
/// let ptr1: *const i32 = &a[1];
/// let ptr2: *const i32 = a.as_ptr().wrapping_offset(3); // out-of-bounds!
/// unsafe {
/// assert_eq!(ptr2.wrapping_offset_from(ptr1), 2);
/// assert_eq!(ptr1.wrapping_offset_from(ptr2), -2);
/// assert_eq!(ptr1.wrapping_offset(2), ptr2);
/// assert_eq!(ptr2.wrapping_offset(-2), ptr1);
/// }
/// ```
///
/// *Incorrect* usage:
///
/// ```rust,no_run
/// #![feature(ptr_wrapping_offset_from)]
/// let ptr1 = Box::into_raw(Box::new(0u8)) as *const u8;
/// let ptr2 = Box::into_raw(Box::new(1u8)) as *const u8;
/// let diff = (ptr2 as isize).wrapping_sub(ptr1 as isize);
/// // Make ptr2_other an "alias" of ptr2, but derived from ptr1.
/// let ptr2_other = (ptr1 as *const u8).wrapping_offset(diff);
/// assert_eq!(ptr2 as usize, ptr2_other as usize);
/// // Since ptr2_other and ptr2 are derived from pointers to different objects,
/// // computing their offset is undefined behavior, even though
/// // they point to the same address!
/// unsafe {
/// let zero = ptr2_other.wrapping_offset_from(ptr2); // Undefined Behavior
/// }
/// ```
#[unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
#[rustc_const_unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
#[cfg(not(bootstrap))]
pub const unsafe fn wrapping_offset_from(self, origin: *const T) -> isize
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also have a wrapping_sub_ptr, if there is interest.

where
T: Sized,
{
// SAFETY: the caller must uphold the safety contract for `ptr_offset_from`.
unsafe { intrinsics::ptr_wrapping_offset_from(self, origin) }
}

/// Calculates the distance between two pointers using wrapping arithmetic. The returned value
/// is in units of **bytes**.
///
/// This is purely a convenience for casting to a `u8` pointer and using
/// [`wrapping_offset_from`][pointer::wrapping_offset_from] on it. See that method for
/// documentation and safety requirements.
///
/// For non-`Sized` pointees this operation considers only the data pointers,
/// ignoring the metadata.
#[inline(always)]
#[unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
#[rustc_const_unstable(feature = "ptr_wrapping_offset_from", issue = "none")]
Comment on lines +903 to +904
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be tracked with wrapping_offset_from or with the other bytes methods, not sure which is more appropriate.

#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
#[cfg(not(bootstrap))]
pub const unsafe fn wrapping_byte_offset_from<U: ?Sized>(self, origin: *const U) -> isize {
// SAFETY: the caller must uphold the safety contract for `wrapping_offset_from`.
unsafe { self.cast::<u8>().wrapping_offset_from(origin.cast::<u8>()) }
}

/// Returns whether two pointers are guaranteed to be equal.
///
/// At runtime this function behaves like `Some(self == other)`.
Expand Down
Loading