From a6f45ee238a6c7e086d52b3a818eca565d3cc10b Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Tue, 22 Mar 2022 01:27:28 -0400 Subject: [PATCH 01/10] WIP PROOF-OF-CONCEPT: experiment with very strict pointer provenance This patch series examines the question: how bad would it be if we adopted an extremely strict pointer provenance model that completely banished all int<->ptr casts. The key insight to making this approach even *vaguely* pallatable is the ptr.with_addr(addr) -> ptr function, which takes a pointer and an address and creates a new pointer with that address and the provenance of the input pointer. In this way the "chain of custody" is completely and dynamically restored, making the model suitable even for dynamic checkers like CHERI and Miri. This is not a formal model, but lots of the docs discussing the model have been updated to try to the *concept* of this design in the hopes that it can be iterated on. Many new methods have been added to ptr to attempt to fill in semantic gaps that this introduces, or to just get the ball rolling on "hey this is a problem that needs to be solved, here's a bad solution as a starting point". --- library/core/src/ptr/const_ptr.rs | 170 +++++++++++++++++++----- library/core/src/ptr/mod.rs | 206 +++++++++++++++++++++++++++--- library/core/src/ptr/mut_ptr.rs | 166 +++++++++++++++++++----- 3 files changed, 461 insertions(+), 81 deletions(-) diff --git a/library/core/src/ptr/const_ptr.rs b/library/core/src/ptr/const_ptr.rs index 753220669831f..352794dc6d0dd 100644 --- a/library/core/src/ptr/const_ptr.rs +++ b/library/core/src/ptr/const_ptr.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): this module still uses lots of casts to polyfill things. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use super::*; use crate::cmp::Ordering::{self, Equal, Greater, Less}; use crate::intrinsics; @@ -60,44 +63,37 @@ impl *const T { /// Casts a pointer to its raw bits. /// - /// This is equivalent to `as usize`, but is more specific to enhance readability. - /// The inverse method is [`from_bits`](#method.from_bits). - /// - /// In particular, `*p as usize` and `p as usize` will both compile for - /// pointers to numeric types but do very different things, so using this - /// helps emphasize that reading the bits was intentional. - /// - /// # Examples + /// In general, pointers cannot be understood as "just an integer" + /// and cannot be created from one without additional context. /// - /// ``` - /// #![feature(ptr_to_from_bits)] - /// let array = [13, 42]; - /// let p0: *const i32 = &array[0]; - /// assert_eq!(<*const _>::from_bits(p0.to_bits()), p0); - /// let p1: *const i32 = &array[1]; - /// assert_eq!(p1.to_bits() - p0.to_bits(), 4); - /// ``` + /// If you would like to treat a pointer like an integer anyway, + /// see [`addr`][] and [`with_addr`][] for the responsible way to do that. #[unstable(feature = "ptr_to_from_bits", issue = "91126")] - pub fn to_bits(self) -> usize + pub fn to_bits(self) -> [u8; core::mem::size_of::<*const ()>()] where T: Sized, { - self as usize + unsafe { core::mem::transmute(self) } } /// Creates a pointer from its raw bits. /// /// This is equivalent to `as *const T`, but is more specific to enhance readability. - /// The inverse method is [`to_bits`](#method.to_bits). + /// The inverse method is [`to_bits`](#method.to_bits-1). /// /// # Examples /// /// ``` /// #![feature(ptr_to_from_bits)] /// use std::ptr::NonNull; - /// let dangling: *const u8 = NonNull::dangling().as_ptr(); - /// assert_eq!(<*const u8>::from_bits(1), dangling); + /// let dangling: *mut u8 = NonNull::dangling().as_ptr(); + /// assert_eq!(<*mut u8>::from_bits(1), dangling); /// ``` + #[rustc_deprecated( + since = "1.61.0", + reason = "This design is incompatible with Pointer Provenance", + suggestion = "from_addr" + )] #[unstable(feature = "ptr_to_from_bits", issue = "91126")] pub fn from_bits(bits: usize) -> Self where @@ -106,6 +102,87 @@ impl *const T { bits as Self } + /// Gets the "address" portion of the pointer. + /// + /// On most platforms this is a no-op, as the pointer is just an address, + /// and is equivalent to the deprecated `ptr as usize` cast. + /// + /// On more complicated platforms like CHERI and segmented architectures, + /// this may remove some important metadata. See [`with_addr`][] for + /// details on this distinction and why it's important. + #[unstable(feature = "strict_provenance", issue = "99999999")] + pub fn addr(self) -> usize + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + self as usize + } + + /// Creates a new pointer with the given address. + /// + /// See also: [`ptr::fake_alloc`][] and [`ptr::zst_exists`][]. + /// + /// This replaces the deprecated `usize as ptr` cast, which had + /// fundamentally broken semantics because it couldn't restore + /// *segment* and *provenance*. + /// + /// A pointer semantically has 3 pieces of information associated with it: + /// + /// * Segment: The address-space it is part of. + /// * Provenance: An allocation (slice) that it is allowed to access. + /// * Address: The actual address it points at. + /// + /// The compiler and hardware need to properly understand all 3 of these + /// values at all times to properly execute your code. + /// + /// Segment and Provenance are implicitly defined by *how* a pointer is + /// constructed and generally propagates verbatim to all derived pointers. + /// It is therefore *impossible* to convert an address into a pointer + /// on its own, because there is no way to know what its segment and + /// provenance should be. + /// + /// By introducing a "representative" pointer into the process we can + /// properly construct a new pointer with *its* segment and provenance, + /// just as any other derived pointer would. This *should* be equivalent + /// to `wrapping_offset`ting the given pointer to the new address. See the + /// docs for `wrapping_offset` for the restrictions this applies. + /// + /// # Example + /// + /// Here is an example of how to properly use this API to mess around + /// with tagged pointers. Here we have a tag in the lowest bit: + /// + /// ```ignore + /// let my_tagged_ptr: *const T = ...; + /// + /// // Get the address and do whatever bit tricks we like + /// let addr = my_tagged_ptr.addr(); + /// let has_tag = (addr & 0x1) != 0; + /// let real_addr = addr & !0x1; + /// + /// // Reconstitute a pointer with the new address and use it + /// let my_untagged_ptr = my_tagged_ptr.with_addr(real_addr); + /// let val = *my_untagged_ptr; + /// ``` + #[unstable(feature = "strict_provenance", issue = "99999999")] + pub fn with_addr(self, addr: usize) -> Self + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // + // In the mean-time, this operation is defined to be "as if" it was + // a wrapping_offset, so we can emulate it as such. This should properly + // restore pointer provenance even under today's compiler. + let self_addr = self.addr() as isize; + let dest_addr = addr as isize; + let offset = dest_addr.wrapping_sub(self_addr); + + // This is the canonical desugarring of this operation + self.cast::().wrapping_offset(offset).cast::() + } + /// Decompose a (possibly wide) pointer into its address and metadata components. /// /// The pointer can be later reconstructed with [`from_raw_parts`]. @@ -305,10 +382,10 @@ impl *const T { /// This operation itself is always safe, but using the resulting pointer is not. /// /// The resulting pointer "remembers" the [allocated object] that `self` points to; it must not - /// be used to read or write other allocated objects. + /// be used to read or write other allocated objects. This is tracked by provenance. /// - /// In other words, `let z = x.wrapping_offset((y as isize) - (x as isize))` does *not* make `z` - /// the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still + /// In other words, `let z = x.wrapping_offset((y.addr() as isize) - (x.addr() as isize))` + /// does *not* make `z` the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still /// attached to the object `x` is attached to, and dereferencing it is Undefined Behavior unless /// `x` and `y` point into the same allocated object. /// @@ -320,8 +397,39 @@ impl *const T { /// /// The delayed check only considers the value of the pointer that was dereferenced, not the /// intermediate values used during the computation of the final result. For example, - /// `x.wrapping_offset(o).wrapping_offset(o.wrapping_neg())` is always the same as `x`. In other - /// words, leaving the allocated object and then re-entering it later is permitted. + /// `x.wrapping_offset(o).wrapping_offset(o.wrapping_neg())` is always the same as `x`... + /// + /// Usually. + /// + /// More work needs to be done to define the rules here, but on CHERI it is not *actually* + /// a no-op to wrapping_offset a pointer to some random address and back again. For practical + /// applications that actually need this, it *will* generally work, but if your offset is + /// "too out of bounds" the system will mark your pointer as invalid, and subsequent reads + /// will fault *as if* the pointer had been corrupted by a non-pointer instruction. + /// + /// CHERI has a roughly 64-bit address space but its 128-bit pointers contain + /// 3 ostensibly-address-space-sized values: + /// + /// * 2 values for the "slice" that the pointer can access. + /// * 1 value for the actuall address it points to. + /// + /// To accomplish this, CHERI compresses the values and even requires large allocations + /// to have higher alignment to free up extra bits. This compression scheme can support + /// the pointer being offset outside of the slice, but only to an extent. A *generous* + /// extent, but a limited one nonetheless. To quote CHERI's documenation: + /// + /// > With 27 bits of the capability used for bounds, CHERI-MIPS and 64-bit + /// > CHERI-RISC-V provide the following guarantees: + /// > + /// > * A pointer is able to travel at least 1⁄4 the size of the object, or 2 KiB, + /// > whichever is greater, above its upper bound. + /// > * It is able to travel at least 1⁄8 the size of the object, or 1 KiB, + /// > whichever is greater, below its lower bound. + /// + /// Needless to say, any scheme that relies on reusing the least significant bits + /// of a pointer based on alignment is going to be fine. Any scheme which tries + /// to set *high* bits isn't going to work, but that was *already* extremely + /// platform-specific and not at all portable. /// /// [`offset`]: #method.offset /// [allocated object]: crate::ptr#allocated-object @@ -427,10 +535,10 @@ impl *const T { /// ```rust,no_run /// let ptr1 = Box::into_raw(Box::new(0u8)) as *const u8; /// let ptr2 = Box::into_raw(Box::new(1u8)) as *const u8; - /// let diff = (ptr2 as isize).wrapping_sub(ptr1 as isize); + /// let diff = (ptr2.addr() as isize).wrapping_sub(ptr1.addr() as isize); /// // Make ptr2_other an "alias" of ptr2, but derived from ptr1. /// let ptr2_other = (ptr1 as *const u8).wrapping_offset(diff); - /// assert_eq!(ptr2 as usize, ptr2_other as usize); + /// assert_eq!(ptr2.addr(), ptr2_other.addr()); /// // Since ptr2_other and ptr2 are derived from pointers to different objects, /// // computing their offset is undefined behavior, even though /// // they point to the same address! @@ -653,7 +761,7 @@ impl *const T { /// The resulting pointer "remembers" the [allocated object] that `self` points to; it must not /// be used to read or write other allocated objects. /// - /// In other words, `let z = x.wrapping_add((y as usize) - (x as usize))` does *not* make `z` + /// In other words, `let z = x.wrapping_add((y.addr()) - (x.addr()))` does *not* make `z` /// the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still /// attached to the object `x` is attached to, and dereferencing it is Undefined Behavior unless /// `x` and `y` point into the same allocated object. @@ -715,7 +823,7 @@ impl *const T { /// The resulting pointer "remembers" the [allocated object] that `self` points to; it must not /// be used to read or write other allocated objects. /// - /// In other words, `let z = x.wrapping_sub((x as usize) - (y as usize))` does *not* make `z` + /// In other words, `let z = x.wrapping_sub((x.addr()) - (y.addr()))` does *not* make `z` /// the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still /// attached to the object `x` is attached to, and dereferencing it is Undefined Behavior unless /// `x` and `y` point into the same allocated object. @@ -1003,7 +1111,7 @@ impl *const [T] { /// use std::ptr; /// /// let slice: *const [i8] = ptr::slice_from_raw_parts(ptr::null(), 3); - /// assert_eq!(slice.as_ptr(), 0 as *const i8); + /// assert_eq!(slice.as_ptr(), ptr::null()); /// ``` #[inline] #[unstable(feature = "slice_ptr_get", issue = "74265")] diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 59b1b4c136752..b1a8ded66c611 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): this module still uses lots of casts to polyfill things. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + //! Manually manage memory through raw pointers. //! //! *[See also the pointer primitive types](pointer).* @@ -20,12 +23,8 @@ //! be *dereferenceable*: the memory range of the given size starting at the pointer must all be //! within the bounds of a single allocated object. Note that in Rust, //! every (stack-allocated) variable is considered a separate allocated object. -//! * Even for operations of [size zero][zst], the pointer must not be pointing to deallocated -//! memory, i.e., deallocation makes pointers invalid even for zero-sized operations. However, -//! casting any non-zero integer *literal* to a pointer is valid for zero-sized accesses, even if -//! some memory happens to exist at that address and gets deallocated. This corresponds to writing -//! your own allocator: allocating zero-sized objects is not very hard. The canonical way to -//! obtain a pointer that is valid for zero-sized accesses is [`NonNull::dangling`]. +//! * Deallocating memory invalidates all pointers with provenance to that allocation, *even* +//! for accesses of [size zero][zst]. //! * All accesses performed by functions in this module are *non-atomic* in the sense //! of [atomic operations] used to synchronize between threads. This means it is //! undefined behavior to perform two concurrent accesses to the same location from different @@ -55,7 +54,7 @@ //! has size 0, i.e., even if memory is not actually touched. Consider using //! [`NonNull::dangling`] in such cases. //! -//! ## Allocated object +//! ## Allocated Object and Provenance //! //! For several operations, such as [`offset`] or field projections (`expr.field`), the notion of an //! "allocated object" becomes relevant. An allocated object is a contiguous region of memory. @@ -63,12 +62,38 @@ //! separate allocated object), heap allocations (each allocation created by the global allocator is //! a separate allocated object), and `static` variables. //! +//! When an object is allocated there should be only one way to access to it (the variable's name, +//! the pointer returned by malloc). This One True Handle is given a unique *provenance* which +//! gives it permission to access that object. This provenance, and therefore permission to access +//! the allocation, is implicitly shared with all pointers that are either directly or transitively +//! *derived* from the One True Handle through operations like `offset` or borrowing. +//! +//! (Unclear detail: taking a subslice is proposed to create a slice that is no longer allowed +//! to access the full range of memory -- is this part of provenance or another system?) +//! +//! You may always "forge" an allocated object of size 0 at any properly aligned non-[null] +//! address with [`zst_exists`] or [`NonNull::dangling`], even at addresses which "accidentally" +//! overlap other allocations. This is allowed because the forged pointer has its own +//! provenance, which means the compiler can distinguish between the forged pointer +//! and those that *genuinely* point into the allocation (and the forged pointer can't +//! be used to load or store any memory, so it really can't do anything observable +//! that alias analysis might be concerned with). +//! +//! Critically, this means that freeing the *actually* allocated object does not +//! invalidate the "accidentally" overlapping forged allocation. This is in some sense +//! equivalent to the fact you don't get to be "lucky" and use a freed pointer whenever +//! the memory it points at happens to get reallocated -- the new allocation has a fresh +//! provenance, and so has no relationship to the freed pointer, even if the addresses +//! may be the same. +//! +//! //! [aliasing]: ../../nomicon/aliasing.html //! [book]: ../../book/ch19-01-unsafe-rust.html#dereferencing-a-raw-pointer //! [ub]: ../../reference/behavior-considered-undefined.html //! [zst]: ../../nomicon/exotic-sizes.html#zero-sized-types-zsts //! [atomic operations]: crate::sync::atomic //! [`offset`]: pointer::offset +//! [`zst_exists`]: pointer::zst_exists #![stable(feature = "rust1", since = "1.0.0")] @@ -210,7 +235,7 @@ pub unsafe fn drop_in_place(to_drop: *mut T) { #[rustc_const_stable(feature = "const_ptr_null", since = "1.24.0")] #[rustc_diagnostic_item = "ptr_null"] pub const fn null() -> *const T { - 0 as *const T + invalid::(0) } /// Creates a null mutable raw pointer. @@ -230,7 +255,143 @@ pub const fn null() -> *const T { #[rustc_const_stable(feature = "const_ptr_null", since = "1.24.0")] #[rustc_diagnostic_item = "ptr_null_mut"] pub const fn null_mut() -> *mut T { - 0 as *mut T + invalid_mut::(0) +} + +/// Forge a pointer to a Zero-Sized Type (ZST) from nothing. +/// +/// Zero-sized types do not actually exist in memory, and therefore you cannot actually +/// "read" or "write" a ZST (any method that claims to do so is just playing pretend, +/// although you do still need to respect alignment for something like `&[u32; 0]`). +/// As a result, you are free to claim a ZSTs exists anywhere you want (except null). +/// +/// This API exists to make the soundness of this pattern explicit, even under +/// "strict provenance". It is equivalent to the deprecated `addr as *mut T` cast. +/// +/// **BUT YOU AREN'T ACTUALLY ALLOWED TO BLINDLY FORGE ZST _INSTANCES_.** +/// +/// It's sound for an API to use an instance of a ZST to enforce some important +/// safety property. So for instance, you can make an API like this: +/// +/// ```ignore +/// pub struct Step1Token(_private_to_construct: ()); +/// +/// pub fn step1() -> Step1Token { ... } +/// pub fn step2(proof: Step1Token) { ... } +/// ``` +/// +/// And it's sound in the body of `step2` to assume that `step1` has been run +/// beforehand, because the only way to get an instance of Step1Token is to call +/// `step1` (assuming `step1` is indeed the only API that creates one). +/// +/// A well-behaved abstraction should conceptually only be "reading" ZSTs that it +/// has previously "written". You don't *actually* need to do the write, and could +/// feed it into [`mem::forget`][] instead, but for ZSTs `write` is a perfectly +/// good way to say `forget` and better expresses the semantics of your code. +/// +/// Anything that stores *many* ZSTs should at the minimum maintain a counter of how +/// many it has written so that it can know how many it can/must read later. +/// `Vec<()>` is basically just a counter that goes up on `push` and down on `pop`. +/// +/// Note: if you need to "allocate" memory for a buffer of ZSTs, +/// [`core::ptr::NonNull::dangling`][] is more useful, because it +/// handles alignment for you. +/// +/// # Example +/// +/// ``` +/// use core::{ptr, mem}; +/// +/// // I store my ZSTs at the *coolest* address +/// let my_good_ptr = ptr::zst_exists::<()>(0xc001_add7); +/// +/// // "store" and then "load" a ZST at this cool address. +/// my_good_ptr.write(()); +/// let output = my_good_ptr.read(); +/// ``` +#[inline(always)] +#[must_use] +#[rustc_const_stable(feature = "strict_provenance", since = "1.61.0")] +#[unstable(feature = "strict_provenance", issue = "99999999")] +pub const fn zst_exists(addr: usize) -> *mut T +where + T: Sized, +{ + // Probably a better way to enforce this, too tired. + assert!(core::mem::size_of::() == 0); + + // We are the language so we get to know that `invalid` is fine here. + invalid_mut::(addr) +} + +/// Claim that you have "allocated" and have unique access to the range +/// of memory `address .. address + len * size_of::()`. +/// +/// (This is an extremely "shot in the dark" design, but throwing it out +/// here as a possible sketch of an answer to the problem.) +/// +/// When dealing with low-level situations like memory-mapped peripherals, +/// the programmer typically needs to just blindly assume a specific address +/// can be interpretted as a pointer and read/written. +/// +/// This is a problem for Pointer Provenance and Segmenting, because there +/// is no "chain of custody" to an allocation. One possible solution to this +/// is for the programmer to Pretend To Be Malloc and "allocate" the address. +/// See [`with_addr`] for more details. +/// +/// Just as with *real* malloc, the compiler is free to assume the pointer +/// returned from this function is completely unaliased, and that all accesses +/// to this range of memory occur *only* from pointers with provenance derived +/// from this one. These assumptions can be loosened with operations like +/// [`read_volatile`][] which you were probably going to use already. +/// +/// This is only sound to do if: +/// +/// * You are actually allowed to access that specified range of memory +/// * All future accesses to this range of memory are through this pointer +/// * You never `claim_alloc` this memory again (Maybe? Should we have claim_dealloc?) +/// +/// Basically, pretend you're `malloc` and think about how bad it would be +/// if `malloc` returned the same pointer twice (without any freeing). +/// +/// This design needs to be workshopped but needless to say it is Extremely +/// Undefined Behaviour to do wrong things with this. +#[must_use] +#[unstable(feature = "strict_provenance", issue = "99999999")] +pub unsafe fn claim_alloc(addr: usize, _len: usize) -> *mut T +where + T: Sized, +{ + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + addr as *mut T +} + +/// Creates an invalid pointer with the given address. +/// +/// This pointer will have no provenance associated with it and is therefore +/// UB to read/write/offset it. This mostly exists to facilitate things +/// like ptr::null and NonNull::dangling which make invalid pointers. +#[inline(always)] +#[must_use] +#[rustc_const_stable(feature = "strict_provenance", since = "1.61.0")] +#[unstable(feature = "strict_provenance", issue = "99999999")] +pub const fn invalid(addr: usize) -> *const T { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + addr as *const T +} + +/// Creates a mutable invalid pointer with the given address. +/// +/// This pointer will have no provenance associated with it and is therefore +/// UB to read/write/offset it. This mostly exists to facilitate things +/// like ptr::null and NonNull::dangling which make invalid pointers. +#[inline(always)] +#[must_use] +#[rustc_const_stable(feature = "strict_provenance", since = "1.61.0")] +#[unstable(feature = "strict_provenance", issue = "99999999")] +pub const fn invalid_mut(addr: usize) -> *mut T { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + addr as *mut T } /// Forms a raw slice from a pointer and a length. @@ -1110,6 +1271,8 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { unchecked_shl, unchecked_shr, unchecked_sub, wrapping_add, wrapping_mul, wrapping_sub, }; + let addr = p.addr(); + /// Calculate multiplicative modular inverse of `x` modulo `m`. /// /// This implementation is tailored for `align_offset` and has following preconditions: @@ -1170,13 +1333,10 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { // // which distributes operations around the load-bearing, but pessimizing `and` sufficiently // for LLVM to be able to utilize the various optimizations it knows about. - return wrapping_sub( - wrapping_add(p as usize, a_minus_one) & wrapping_sub(0, a), - p as usize, - ); + return wrapping_sub(wrapping_add(addr, a_minus_one) & wrapping_sub(0, a), addr); } - let pmoda = p as usize & a_minus_one; + let pmoda = addr & a_minus_one; if pmoda == 0 { // Already aligned. Yay! return 0; @@ -1193,7 +1353,7 @@ pub(crate) unsafe fn align_offset(p: *const T, a: usize) -> usize { let gcd = unsafe { unchecked_shl(1usize, gcdpow) }; // SAFETY: gcd is always greater or equal to 1. - if p as usize & unsafe { unchecked_sub(gcd, 1) } == 0 { + if addr & unsafe { unchecked_sub(gcd, 1) } == 0 { // This branch solves for the following linear congruence equation: // // ` p + so = 0 mod a ` @@ -1347,6 +1507,10 @@ pub fn hash(hashee: *const T, into: &mut S) { hashee.hash(into); } +// FIXME(strict_provenance_magic): function pointers have buggy codegen that +// necessitates casting to a usize to get the backend to do the right thing. +// for now I will break AVR to silence *a billion* lints. + // Impls for function pointers macro_rules! fnptr_impls_safety_abi { ($FnTy: ty, $($Arg: ident),*) => { @@ -1354,7 +1518,7 @@ macro_rules! fnptr_impls_safety_abi { impl PartialEq for $FnTy { #[inline] fn eq(&self, other: &Self) -> bool { - *self as usize == *other as usize + *self as *const () == *other as *const () } } @@ -1365,7 +1529,7 @@ macro_rules! fnptr_impls_safety_abi { impl PartialOrd for $FnTy { #[inline] fn partial_cmp(&self, other: &Self) -> Option { - (*self as usize).partial_cmp(&(*other as usize)) + (*self as *const ()).partial_cmp(&(*other as *const ())) } } @@ -1373,14 +1537,14 @@ macro_rules! fnptr_impls_safety_abi { impl Ord for $FnTy { #[inline] fn cmp(&self, other: &Self) -> Ordering { - (*self as usize).cmp(&(*other as usize)) + (*self as *const ()).cmp(&(*other as *const ())) } } #[stable(feature = "fnptr_impls", since = "1.4.0")] impl hash::Hash for $FnTy { fn hash(&self, state: &mut HH) { - state.write_usize(*self as usize) + state.write_usize((*self as *const ()).addr()) } } @@ -1392,7 +1556,7 @@ macro_rules! fnptr_impls_safety_abi { // is preserved in the final function pointer. // // https://github.com/avr-rust/rust/issues/143 - fmt::Pointer::fmt(&(*self as usize as *const ()), f) + fmt::Pointer::fmt(&(*self as *const ()), f) } } @@ -1404,7 +1568,7 @@ macro_rules! fnptr_impls_safety_abi { // is preserved in the final function pointer. // // https://github.com/avr-rust/rust/issues/143 - fmt::Pointer::fmt(&(*self as usize as *const ()), f) + fmt::Pointer::fmt(&(*self as *const ()), f) } } } diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs index 861412703d3c6..088f4e926f6ce 100644 --- a/library/core/src/ptr/mut_ptr.rs +++ b/library/core/src/ptr/mut_ptr.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): this module still uses lots of casts to polyfill things. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use super::*; use crate::cmp::Ordering::{self, Equal, Greater, Less}; use crate::intrinsics; @@ -63,30 +66,17 @@ impl *mut T { /// Casts a pointer to its raw bits. /// - /// This is equivalent to `as usize`, but is more specific to enhance readability. - /// The inverse method is [`from_bits`](#method.from_bits-1). - /// - /// In particular, `*p as usize` and `p as usize` will both compile for - /// pointers to numeric types but do very different things, so using this - /// helps emphasize that reading the bits was intentional. - /// - /// # Examples + /// In general, pointers cannot be understood as "just an integer" + /// and cannot be created from one without additional context. /// - /// ``` - /// #![feature(ptr_to_from_bits)] - /// let mut array = [13, 42]; - /// let mut it = array.iter_mut(); - /// let p0: *mut i32 = it.next().unwrap(); - /// assert_eq!(<*mut _>::from_bits(p0.to_bits()), p0); - /// let p1: *mut i32 = it.next().unwrap(); - /// assert_eq!(p1.to_bits() - p0.to_bits(), 4); - /// ``` + /// If you would like to treat a pointer like an integer anyway, + /// see [`addr`][] and [`with_addr`][] for the responsible way to do that. #[unstable(feature = "ptr_to_from_bits", issue = "91126")] - pub fn to_bits(self) -> usize + pub fn to_bits(self) -> [u8; core::mem::size_of::<*mut ()>()] where T: Sized, { - self as usize + unsafe { core::mem::transmute(self) } } /// Creates a pointer from its raw bits. @@ -102,6 +92,11 @@ impl *mut T { /// let dangling: *mut u8 = NonNull::dangling().as_ptr(); /// assert_eq!(<*mut u8>::from_bits(1), dangling); /// ``` + #[rustc_deprecated( + since = "1.61.0", + reason = "This design is incompatible with Pointer Provenance", + suggestion = "from_addr" + )] #[unstable(feature = "ptr_to_from_bits", issue = "91126")] pub fn from_bits(bits: usize) -> Self where @@ -110,6 +105,88 @@ impl *mut T { bits as Self } + /// Gets the "address" portion of the pointer. + /// + /// On most platforms this is a no-op, as the pointer is just an address, + /// and is equivalent to the deprecated `ptr as usize` cast. + /// + /// On more complicated platforms like CHERI and segmented architectures, + /// this may remove some important metadata. See [`with_addr`][] for + /// details on this distinction and why it's important. + #[unstable(feature = "strict_provenance", issue = "99999999")] + pub fn addr(self) -> usize + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + self as usize + } + + /// Creates a new pointer with the given address. + /// + /// See also: [`ptr::fake_alloc`][] and [`ptr::zst_exists`][]. + /// + /// This replaces the deprecated `usize as ptr` cast, which had + /// fundamentally broken semantics because it couldn't restore + /// *segment* and *provenance*. + /// + /// A pointer semantically has 3 pieces of information associated with it: + /// + /// * Segment: The address-space it is part of. + /// * Provenance: An allocation (slice) that it is allowed to access. + /// * Address: The actual address it points at. + /// + /// The compiler and hardware need to properly understand all 3 of these + /// values at all times to properly execute your code. + /// + /// Segment and Provenance are implicitly defined by *how* a pointer is + /// constructed and generally propagates verbatim to all derived pointers. + /// It is therefore *impossible* to convert an address into a pointer + /// on its own, because there is no way to know what its segment and + /// provenance should be. + /// + /// By introducing a "representative" pointer into the process we can + /// properly construct a new pointer with *its* segment and provenance, + /// just as any other derived pointer would. This *should* be equivalent + /// to `wrapping_offset`ting the given pointer to the new address. See the + /// docs for `wrapping_offset` for the restrictions this applies. + /// + /// # Example + /// + /// Here is an example of how to properly use this API to mess around + /// with tagged pointers. Here we have a tag in the lowest bit: + /// + /// ```ignore + /// let my_tagged_ptr: *mut T = ...; + /// + /// // Get the address and do whatever bit tricks we like + /// let addr = my_tagged_ptr.addr(); + /// let has_tag = (addr & 0x1) != 0; + /// let real_addr = addr & !0x1; + /// + /// // Reconstitute a pointer with the new address and use it + /// let my_untagged_ptr = my_tagged_ptr.with_addr(real_addr); + /// *my_untagged_ptr = ...; + /// ``` + #[unstable(feature = "strict_provenance", issue = "99999999")] + pub fn with_addr(self, addr: usize) -> Self + where + T: Sized, + { + // FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic. + // + // In the mean-time, this operation is defined to be "as if" it was + // a wrapping_offset, so we can emulate it as such. This should properly + // restore pointer provenance even under today's compiler. + let self_addr = self.addr() as isize; + let dest_addr = addr as isize; + let offset = dest_addr.wrapping_sub(self_addr); + + // This is the canonical desugarring of this operation + // + self.cast::().wrapping_offset(offset).cast::() + } + /// Decompose a (possibly wide) pointer into its address and metadata components. /// /// The pointer can be later reconstructed with [`from_raw_parts_mut`]. @@ -316,10 +393,10 @@ impl *mut T { /// This operation itself is always safe, but using the resulting pointer is not. /// /// The resulting pointer "remembers" the [allocated object] that `self` points to; it must not - /// be used to read or write other allocated objects. + /// be used to read or write other allocated objects. This is tracked by provenance. /// - /// In other words, `let z = x.wrapping_offset((y as isize) - (x as isize))` does *not* make `z` - /// the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still + /// In other words, `let z = x.wrapping_offset((y.addr() as isize) - (x.addr() as isize))` + /// does *not* make `z` the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still /// attached to the object `x` is attached to, and dereferencing it is Undefined Behavior unless /// `x` and `y` point into the same allocated object. /// @@ -331,8 +408,39 @@ impl *mut T { /// /// The delayed check only considers the value of the pointer that was dereferenced, not the /// intermediate values used during the computation of the final result. For example, - /// `x.wrapping_offset(o).wrapping_offset(o.wrapping_neg())` is always the same as `x`. In other - /// words, leaving the allocated object and then re-entering it later is permitted. + /// `x.wrapping_offset(o).wrapping_offset(o.wrapping_neg())` is always the same as `x`... + /// + /// Usually. + /// + /// More work needs to be done to define the rules here, but on CHERI it is not *actually* + /// a no-op to wrapping_offset a pointer to some random address and back again. For practical + /// applications that actually need this, it *will* generally work, but if your offset is + /// "too out of bounds" the system will mark your pointer as invalid, and subsequent reads + /// will fault *as if* the pointer had been corrupted by a non-pointer instruction. + /// + /// CHERI has a roughly 64-bit address space but its 128-bit pointers contain + /// 3 ostensibly-address-space-sized values: + /// + /// * 2 values for the "slice" that the pointer can access. + /// * 1 value for the actuall address it points to. + /// + /// To accomplish this, CHERI compresses the values and even requires large allocations + /// to have higher alignment to free up extra bits. This compression scheme can support + /// the pointer being offset outside of the slice, but only to an extent. A *generous* + /// extent, but a limited one nonetheless. To quote CHERI's documenation: + /// + /// > With 27 bits of the capability used for bounds, CHERI-MIPS and 64-bit + /// > CHERI-RISC-V provide the following guarantees: + /// > + /// > * A pointer is able to travel at least 1⁄4 the size of the object, or 2 KiB, + /// > whichever is greater, above its upper bound. + /// > * It is able to travel at least 1⁄8 the size of the object, or 1 KiB, + /// > whichever is greater, below its lower bound. + /// + /// Needless to say, any scheme that relies on reusing the least significant bits + /// of a pointer based on alignment is going to be fine. Any scheme which tries + /// to set *high* bits isn't going to work, but that was *already* extremely + /// platform-specific and not at all portable. /// /// [`offset`]: #method.offset /// [allocated object]: crate::ptr#allocated-object @@ -605,10 +713,10 @@ impl *mut T { /// ```rust,no_run /// let ptr1 = Box::into_raw(Box::new(0u8)); /// let ptr2 = Box::into_raw(Box::new(1u8)); - /// let diff = (ptr2 as isize).wrapping_sub(ptr1 as isize); + /// let diff = (ptr2.addr() as isize).wrapping_sub(ptr1.addr() as isize); /// // Make ptr2_other an "alias" of ptr2, but derived from ptr1. /// let ptr2_other = (ptr1 as *mut u8).wrapping_offset(diff); - /// assert_eq!(ptr2 as usize, ptr2_other as usize); + /// assert_eq!(ptr2.addr(), ptr2_other.addr()); /// // Since ptr2_other and ptr2 are derived from pointers to different objects, /// // computing their offset is undefined behavior, even though /// // they point to the same address! @@ -767,7 +875,7 @@ impl *mut T { /// The resulting pointer "remembers" the [allocated object] that `self` points to; it must not /// be used to read or write other allocated objects. /// - /// In other words, `let z = x.wrapping_add((y as usize) - (x as usize))` does *not* make `z` + /// In other words, `let z = x.wrapping_add((y.addr()) - (x.addr()))` does *not* make `z` /// the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still /// attached to the object `x` is attached to, and dereferencing it is Undefined Behavior unless /// `x` and `y` point into the same allocated object. @@ -829,7 +937,7 @@ impl *mut T { /// The resulting pointer "remembers" the [allocated object] that `self` points to; it must not /// be used to read or write other allocated objects. /// - /// In other words, `let z = x.wrapping_sub((x as usize) - (y as usize))` does *not* make `z` + /// In other words, `let z = x.wrapping_sub((x.addr()) - (y.addr()))` does *not* make `z` /// the same as `y` even if we assume `T` has size `1` and there is no overflow: `z` is still /// attached to the object `x` is attached to, and dereferencing it is Undefined Behavior unless /// `x` and `y` point into the same allocated object. @@ -1273,7 +1381,7 @@ impl *mut [T] { /// use std::ptr; /// /// let slice: *mut [i8] = ptr::slice_from_raw_parts_mut(ptr::null_mut(), 3); - /// assert_eq!(slice.as_mut_ptr(), 0 as *mut i8); + /// assert_eq!(slice.as_mut_ptr(), ptr::null_mut()); /// ``` #[inline(always)] #[unstable(feature = "slice_ptr_get", issue = "74265")] From 93f7f06737686fdd6a44127d51129764c0d0a0bc Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Mon, 21 Mar 2022 19:25:44 -0400 Subject: [PATCH 02/10] WIP PROOF-OF-CONCEPT: Make the compiler complain about all int<->ptr casts. ALL OF THEM --- compiler/rustc_lint_defs/src/builtin.rs | 36 ++++++++++++++ compiler/rustc_typeck/src/check/cast.rs | 64 +++++++++++++++++++++++-- 2 files changed, 96 insertions(+), 4 deletions(-) diff --git a/compiler/rustc_lint_defs/src/builtin.rs b/compiler/rustc_lint_defs/src/builtin.rs index 88e049410790d..6933b20457811 100644 --- a/compiler/rustc_lint_defs/src/builtin.rs +++ b/compiler/rustc_lint_defs/src/builtin.rs @@ -2648,6 +2648,41 @@ declare_lint! { }; } +declare_lint! { + /// The `fuzzy_provenance_casts` lint detects an `as` cast between an integer + /// and a pointer. + /// + /// ### Example + /// + /// fn main() { + /// let my_ref = &0; + /// let my_addr = my_ref as usize; + /// } + /// ``` + /// + /// {{produces}} + /// + /// ### Explanation + /// + /// Casting a pointer to an integer or an integer to a pointer is a lossy operation, + /// because beyond just an *address* a pointer may be associated with a particular + /// *provenance* and *segment*. This information is required by both the compiler + /// and the hardware to correctly execute your code. If you need to do this kind + /// of operation, use ptr::addr and ptr::with_addr. + /// + /// This is a [future-incompatible] lint to transition this to a hard error + /// in the future. See [issue #9999999] for more details. + /// + /// [future-incompatible]: ../index.md#future-incompatible-lints + /// [issue #9999999]: https://github.com/rust-lang/rust/issues/9999999 + pub FUZZY_PROVENANCE_CASTS, + Warn, + "A lossy pointer-integer integer cast is used", + @future_incompatible = FutureIncompatibleInfo { + reference: "issue #9999999 ", + }; +} + declare_lint! { /// The `const_evaluatable_unchecked` lint detects a generic constant used /// in a type. @@ -3101,6 +3136,7 @@ declare_lint_pass! { UNSAFE_OP_IN_UNSAFE_FN, INCOMPLETE_INCLUDE, CENUM_IMPL_DROP_CAST, + FUZZY_PROVENANCE_CASTS, CONST_EVALUATABLE_UNCHECKED, INEFFECTIVE_UNSTABLE_TRAIT_IMPL, MUST_NOT_SUSPEND, diff --git a/compiler/rustc_typeck/src/check/cast.rs b/compiler/rustc_typeck/src/check/cast.rs index 7ce428ea12466..9b67fd54bd66f 100644 --- a/compiler/rustc_typeck/src/check/cast.rs +++ b/compiler/rustc_typeck/src/check/cast.rs @@ -807,11 +807,22 @@ impl<'a, 'tcx> CastCheck<'tcx> { // ptr -> * (Ptr(m_e), Ptr(m_c)) => self.check_ptr_ptr_cast(fcx, m_e, m_c), // ptr-ptr-cast - (Ptr(m_expr), Int(_)) => self.check_ptr_addr_cast(fcx, m_expr), // ptr-addr-cast - (FnPtr, Int(_)) => Ok(CastKind::FnPtrAddrCast), - // * -> ptr - (Int(_), Ptr(mt)) => self.check_addr_ptr_cast(fcx, mt), // addr-ptr-cast + // ptr-addr-cast + (Ptr(m_expr), Int(_)) => { + self.fuzzy_provenance_ptr2int_lint(fcx, t_from); + self.check_ptr_addr_cast(fcx, m_expr) + } + (FnPtr, Int(_)) => { + self.fuzzy_provenance_ptr2int_lint(fcx, t_from); + Ok(CastKind::FnPtrAddrCast) + } + // addr-ptr-cast + (Int(_), Ptr(mt)) => { + self.fuzzy_provenance_int2ptr_lint(fcx); + self.check_addr_ptr_cast(fcx, mt) + } + // fn-ptr-cast (FnPtr, Ptr(mt)) => self.check_fptr_ptr_cast(fcx, mt), // prim -> prim @@ -934,6 +945,7 @@ impl<'a, 'tcx> CastCheck<'tcx> { fcx: &FnCtxt<'a, 'tcx>, m_cast: TypeAndMut<'tcx>, ) -> Result { + self.fuzzy_provenance_int2ptr_lint(fcx); // ptr-addr cast. pointer must be thin. match fcx.pointer_kind(m_cast.ty, self.span)? { None => Err(CastError::UnknownCastPtrKind), @@ -973,6 +985,50 @@ impl<'a, 'tcx> CastCheck<'tcx> { } } } + + fn fuzzy_provenance_ptr2int_lint(&self, fcx: &FnCtxt<'a, 'tcx>, t_from: CastTy<'tcx>) { + fcx.tcx.struct_span_lint_hir( + lint::builtin::FUZZY_PROVENANCE_CASTS, + self.expr.hir_id, + self.span, + |err| { + let mut err = err.build(&format!( + "strict provenance disallows casting pointer `{}` to integer `{}`", + self.expr_ty, self.cast_ty + )); + + if let CastTy::FnPtr = t_from { + err.help( + "use `(... as *const u8).addr()` to obtain \ + the address of a function pointer", + ); + } else { + err.help("use `.addr()` to obtain the address of a pointer"); + } + + err.emit(); + }, + ); + } + + fn fuzzy_provenance_int2ptr_lint(&self, fcx: &FnCtxt<'a, 'tcx>) { + fcx.tcx.struct_span_lint_hir( + lint::builtin::FUZZY_PROVENANCE_CASTS, + self.expr.hir_id, + self.span, + |err| { + err.build(&format!( + "strict provenance disallows casting integer `{}` to pointer `{}`", + self.expr_ty, self.cast_ty + )) + .help( + "use `.with_addr(...)` to adjust a valid pointer \ + in the same allocation, to this address", + ) + .emit(); + }, + ); + } } impl<'a, 'tcx> FnCtxt<'a, 'tcx> { From 701aa6c6ef190b828480eccb642292cf23d6363d Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Tue, 22 Mar 2022 01:24:55 -0400 Subject: [PATCH 03/10] WIP PROOF-OF-CONCEPT: handle all the fallout in the libs Still working on this, but it seems to largely be a lot of `as usize` -> `.addr()` --- library/alloc/src/lib.rs | 1 + library/alloc/src/rc.rs | 5 +- library/alloc/src/slice.rs | 2 +- library/alloc/src/sync.rs | 2 +- library/alloc/src/vec/into_iter.rs | 2 +- library/core/src/alloc/layout.rs | 2 +- library/core/src/fmt/mod.rs | 8 +++- library/core/src/hash/mod.rs | 4 +- library/core/src/intrinsics.rs | 10 ++-- library/core/src/ptr/non_null.rs | 6 +-- library/core/src/ptr/unique.rs | 2 +- library/core/src/slice/ascii.rs | 6 +-- library/core/src/slice/iter/macros.rs | 4 +- library/core/src/slice/sort.rs | 2 +- library/panic_unwind/src/dwarf/eh.rs | 10 ++++ library/panic_unwind/src/seh.rs | 6 +++ library/std/src/backtrace.rs | 8 ++-- library/std/src/io/error/repr_bitpacked.rs | 10 ++-- library/std/src/lib.rs | 1 + library/std/src/os/windows/io/handle.rs | 3 +- library/std/src/os/windows/io/socket.rs | 2 + library/std/src/path.rs | 4 +- library/std/src/sync/once.rs | 49 +++++++++++--------- library/std/src/sys/windows/alloc.rs | 2 +- library/std/src/sys/windows/c.rs | 2 +- library/std/src/sys/windows/compat.rs | 2 +- library/std/src/sys/windows/fs.rs | 11 +++-- library/std/src/sys/windows/mod.rs | 4 +- library/std/src/sys/windows/os.rs | 2 +- library/std/src/sys/windows/thread_parker.rs | 12 ++--- library/std/src/sys_common/condvar/check.rs | 12 +++-- library/std/src/thread/local.rs | 6 +-- 32 files changed, 119 insertions(+), 83 deletions(-) diff --git a/library/alloc/src/lib.rs b/library/alloc/src/lib.rs index 0a180b83355e0..7e90d77b8f289 100644 --- a/library/alloc/src/lib.rs +++ b/library/alloc/src/lib.rs @@ -158,6 +158,7 @@ #![feature(rustc_allow_const_fn_unstable)] #![feature(rustc_attrs)] #![feature(staged_api)] +#![feature(strict_provenance)] #![cfg_attr(test, feature(test))] #![feature(unboxed_closures)] #![feature(unsized_fn_params)] diff --git a/library/alloc/src/rc.rs b/library/alloc/src/rc.rs index ea651c075d968..d6e613b85ff9c 100644 --- a/library/alloc/src/rc.rs +++ b/library/alloc/src/rc.rs @@ -2115,13 +2115,12 @@ impl Weak { #[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")] #[must_use] pub const fn new() -> Weak { - Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut RcBox) } } + Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::>(usize::MAX)) } } } } pub(crate) fn is_dangling(ptr: *mut T) -> bool { - let address = ptr as *mut () as usize; - address == usize::MAX + (ptr as *mut ()).addr() == usize::MAX } /// Helper type to allow accessing the reference counts without diff --git a/library/alloc/src/slice.rs b/library/alloc/src/slice.rs index f52871c73d9fc..7c892f03bfb78 100644 --- a/library/alloc/src/slice.rs +++ b/library/alloc/src/slice.rs @@ -1044,7 +1044,7 @@ where impl Drop for MergeHole { fn drop(&mut self) { // `T` is not a zero-sized type, so it's okay to divide by its size. - let len = (self.end as usize - self.start as usize) / mem::size_of::(); + let len = (self.end.addr() - self.start.addr()) / mem::size_of::(); unsafe { ptr::copy_nonoverlapping(self.start, self.dest, len); } diff --git a/library/alloc/src/sync.rs b/library/alloc/src/sync.rs index ba3187294e654..5d445ddcf6612 100644 --- a/library/alloc/src/sync.rs +++ b/library/alloc/src/sync.rs @@ -1745,7 +1745,7 @@ impl Weak { #[rustc_const_unstable(feature = "const_weak_new", issue = "95091", reason = "recently added")] #[must_use] pub const fn new() -> Weak { - Weak { ptr: unsafe { NonNull::new_unchecked(usize::MAX as *mut ArcInner) } } + Weak { ptr: unsafe { NonNull::new_unchecked(ptr::invalid_mut::>(usize::MAX)) } } } } diff --git a/library/alloc/src/vec/into_iter.rs b/library/alloc/src/vec/into_iter.rs index f985fb78465b9..42433441c935a 100644 --- a/library/alloc/src/vec/into_iter.rs +++ b/library/alloc/src/vec/into_iter.rs @@ -154,7 +154,7 @@ impl Iterator for IntoIter { #[inline] fn size_hint(&self) -> (usize, Option) { let exact = if mem::size_of::() == 0 { - (self.end as usize).wrapping_sub(self.ptr as usize) + self.end.addr().wrapping_sub(self.ptr.addr()) } else { unsafe { self.end.offset_from(self.ptr) as usize } }; diff --git a/library/core/src/alloc/layout.rs b/library/core/src/alloc/layout.rs index ea639268652c3..0639d6eed62a5 100644 --- a/library/core/src/alloc/layout.rs +++ b/library/core/src/alloc/layout.rs @@ -194,7 +194,7 @@ impl Layout { #[inline] pub const fn dangling(&self) -> NonNull { // SAFETY: align is guaranteed to be non-zero - unsafe { NonNull::new_unchecked(self.align() as *mut u8) } + unsafe { NonNull::new_unchecked(crate::ptr::invalid_mut::(self.align())) } } /// Creates a layout describing the record that can hold a value diff --git a/library/core/src/fmt/mod.rs b/library/core/src/fmt/mod.rs index 84cf1753f86ba..0e2e869a920ee 100644 --- a/library/core/src/fmt/mod.rs +++ b/library/core/src/fmt/mod.rs @@ -352,7 +352,11 @@ impl<'a> ArgumentV1<'a> { } fn as_usize(&self) -> Option { - if self.formatter as usize == USIZE_MARKER as usize { + // We are type punning a bit here: USIZE_MARKER only takes an &usize but + // formatter takes an &Opaque. Rust understandably doesn't think we should compare + // the function pointers if they don't have the same signature, so we cast to + // pointers to convince it that we know what we're doing. + if self.formatter as *mut u8 == USIZE_MARKER as *mut u8 { // SAFETY: The `formatter` field is only set to USIZE_MARKER if // the value is a usize, so this is safe Some(unsafe { *(self.value as *const _ as *const usize) }) @@ -2246,7 +2250,7 @@ impl Pointer for *const T { } f.flags |= 1 << (FlagV1::Alternate as u32); - let ret = LowerHex::fmt(&(ptr as usize), f); + let ret = LowerHex::fmt(&(ptr.addr()), f); f.width = old_width; f.flags = old_flags; diff --git a/library/core/src/hash/mod.rs b/library/core/src/hash/mod.rs index 53de8b42c059f..45c9df0c930b9 100644 --- a/library/core/src/hash/mod.rs +++ b/library/core/src/hash/mod.rs @@ -793,7 +793,7 @@ mod impls { #[inline] fn hash(&self, state: &mut H) { let (address, metadata) = self.to_raw_parts(); - state.write_usize(address as usize); + state.write_usize(address.addr()); metadata.hash(state); } } @@ -803,7 +803,7 @@ mod impls { #[inline] fn hash(&self, state: &mut H) { let (address, metadata) = self.to_raw_parts(); - state.write_usize(address as usize); + state.write_usize(address.addr()); metadata.hash(state); } } diff --git a/library/core/src/intrinsics.rs b/library/core/src/intrinsics.rs index 1d457c2b7d542..dd11568d70584 100644 --- a/library/core/src/intrinsics.rs +++ b/library/core/src/intrinsics.rs @@ -987,8 +987,8 @@ extern "rust-intrinsic" { /// std::mem::transmute::<&i32, usize>(ptr) /// }; /// - /// // Use an `as` cast instead - /// let ptr_num_cast = ptr as *const i32 as usize; + /// // Use `.addr()` instead + /// let ptr_num_cast = (ptr as *const i32).addr(); /// ``` /// /// Turning a `*mut T` into an `&mut T`: @@ -1972,15 +1972,15 @@ extern "rust-intrinsic" { /// Checks whether `ptr` is properly aligned with respect to /// `align_of::()`. pub(crate) fn is_aligned_and_not_null(ptr: *const T) -> bool { - !ptr.is_null() && ptr as usize % mem::align_of::() == 0 + !ptr.is_null() && ptr.addr() % mem::align_of::() == 0 } /// Checks whether the regions of memory starting at `src` and `dst` of size /// `count * size_of::()` do *not* overlap. #[cfg(debug_assertions)] pub(crate) fn is_nonoverlapping(src: *const T, dst: *const T, count: usize) -> bool { - let src_usize = src as usize; - let dst_usize = dst as usize; + let src_usize = src.addr(); + let dst_usize = dst.addr(); let size = mem::size_of::().checked_mul(count).unwrap(); let diff = if src_usize > dst_usize { src_usize - dst_usize } else { dst_usize - src_usize }; // If the absolute distance between the ptrs is at least as big as the size of the buffer, diff --git a/library/core/src/ptr/non_null.rs b/library/core/src/ptr/non_null.rs index a698aec51ca71..130c869b97891 100644 --- a/library/core/src/ptr/non_null.rs +++ b/library/core/src/ptr/non_null.rs @@ -90,7 +90,7 @@ impl NonNull { // to a *mut T. Therefore, `ptr` is not null and the conditions for // calling new_unchecked() are respected. unsafe { - let ptr = mem::align_of::() as *mut T; + let ptr = crate::ptr::invalid_mut::(mem::align_of::()); NonNull::new_unchecked(ptr) } } @@ -469,7 +469,7 @@ impl NonNull<[T]> { /// use std::ptr::NonNull; /// /// let slice: NonNull<[i8]> = NonNull::slice_from_raw_parts(NonNull::dangling(), 3); - /// assert_eq!(slice.as_non_null_ptr(), NonNull::new(1 as *mut i8).unwrap()); + /// assert_eq!(slice.as_non_null_ptr(), NonNull::::dangling()); /// ``` #[inline] #[must_use] @@ -489,7 +489,7 @@ impl NonNull<[T]> { /// use std::ptr::NonNull; /// /// let slice: NonNull<[i8]> = NonNull::slice_from_raw_parts(NonNull::dangling(), 3); - /// assert_eq!(slice.as_mut_ptr(), 1 as *mut i8); + /// assert_eq!(slice.as_mut_ptr(), NonNull::::dangling()); /// ``` #[inline] #[must_use] diff --git a/library/core/src/ptr/unique.rs b/library/core/src/ptr/unique.rs index cff68f64f78e0..29398cbeb238d 100644 --- a/library/core/src/ptr/unique.rs +++ b/library/core/src/ptr/unique.rs @@ -73,7 +73,7 @@ impl Unique { pub const fn dangling() -> Self { // SAFETY: mem::align_of() returns a valid, non-null pointer. The // conditions to call new_unchecked() are thus respected. - unsafe { Unique::new_unchecked(mem::align_of::() as *mut T) } + unsafe { Unique::new_unchecked(crate::ptr::invalid_mut::(mem::align_of::())) } } } diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index 1dba24dd14907..0463f4ab8b3eb 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -293,7 +293,7 @@ fn is_ascii(s: &[u8]) -> bool { // Paranoia check about alignment, since we're about to do a bunch of // unaligned loads. In practice this should be impossible barring a bug in // `align_offset` though. - debug_assert_eq!((word_ptr as usize) % mem::align_of::(), 0); + debug_assert_eq!((word_ptr.addr()) % mem::align_of::(), 0); // Read subsequent words until the last aligned word, excluding the last // aligned word by itself to be done in tail check later, to ensure that @@ -301,9 +301,9 @@ fn is_ascii(s: &[u8]) -> bool { while byte_pos < len - USIZE_SIZE { debug_assert!( // Sanity check that the read is in bounds - (word_ptr as usize + USIZE_SIZE) <= (start.wrapping_add(len) as usize) && + (word_ptr.addr() + USIZE_SIZE) <= (start.wrapping_add(len).addr()) && // And that our assumptions about `byte_pos` hold. - (word_ptr as usize) - (start as usize) == byte_pos + (word_ptr.addr()) - (start.addr()) == byte_pos ); // SAFETY: We know `word_ptr` is properly aligned (because of diff --git a/library/core/src/slice/iter/macros.rs b/library/core/src/slice/iter/macros.rs index cf15756868e65..96ead49dd6aaf 100644 --- a/library/core/src/slice/iter/macros.rs +++ b/library/core/src/slice/iter/macros.rs @@ -20,13 +20,13 @@ macro_rules! len { if size == 0 { // This _cannot_ use `unchecked_sub` because we depend on wrapping // to represent the length of long ZST slice iterators. - ($self.end as usize).wrapping_sub(start.as_ptr() as usize) + ($self.end.addr()).wrapping_sub(start.as_ptr().addr()) } else { // We know that `start <= end`, so can do better than `offset_from`, // which needs to deal in signed. By setting appropriate flags here // we can tell LLVM this, which helps it remove bounds checks. // SAFETY: By the type invariant, `start <= end` - let diff = unsafe { unchecked_sub($self.end as usize, start.as_ptr() as usize) }; + let diff = unsafe { unchecked_sub($self.end.addr(), start.as_ptr().addr()) }; // By also telling LLVM that the pointers are apart by an exact // multiple of the type size, it can optimize `len() == 0` down to // `start == end` instead of `(end - start) < size`. diff --git a/library/core/src/slice/sort.rs b/library/core/src/slice/sort.rs index 2ba0e5320d7b9..5cf08b5740e82 100644 --- a/library/core/src/slice/sort.rs +++ b/library/core/src/slice/sort.rs @@ -269,7 +269,7 @@ where // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive). fn width(l: *mut T, r: *mut T) -> usize { assert!(mem::size_of::() > 0); - (r as usize - l as usize) / mem::size_of::() + (r.addr() - l.addr()) / mem::size_of::() } loop { diff --git a/library/panic_unwind/src/dwarf/eh.rs b/library/panic_unwind/src/dwarf/eh.rs index 7394feab82f22..e4e93131f3e97 100644 --- a/library/panic_unwind/src/dwarf/eh.rs +++ b/library/panic_unwind/src/dwarf/eh.rs @@ -10,6 +10,16 @@ #![allow(non_upper_case_globals)] #![allow(unused)] +// FIXME(strict_provenance_magic): the unwinder has special permissions and semantics. +// +// This is at worst an Interesting Case Study that is worth doing a deep dive on. +// +// This implementation pretty aggressively puns *offsets* and actual addresses. It can +// probably be made "strict" but requires a lot of very careful thinking about the +// precise semantics of this format (I think it's basically shoe-horning a bunch of +// things into a common path for simplicity, when in reality there are many impossible +// combinations). +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] use crate::dwarf::DwarfReader; use core::mem; diff --git a/library/panic_unwind/src/seh.rs b/library/panic_unwind/src/seh.rs index 9f1eb411ff660..2019b8539c2e3 100644 --- a/library/panic_unwind/src/seh.rs +++ b/library/panic_unwind/src/seh.rs @@ -45,6 +45,12 @@ //! [llvm]: https://llvm.org/docs/ExceptionHandling.html#background-on-windows-exceptions #![allow(nonstandard_style)] +// FIXME(strict_provenance_magic): the unwinder has special permissions and semantics. +// +// This is at worst an Interesting Case Study that is worth doing a deep dive on. +// +// I haven't looked closely at this implementation yet. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] use alloc::boxed::Box; use core::any::Any; diff --git a/library/std/src/backtrace.rs b/library/std/src/backtrace.rs index 94e6070c0f794..fded482095a46 100644 --- a/library/std/src/backtrace.rs +++ b/library/std/src/backtrace.rs @@ -293,7 +293,7 @@ impl Backtrace { if !Backtrace::enabled() { return Backtrace { inner: Inner::Disabled }; } - Backtrace::create(Backtrace::capture as usize) + Backtrace::create((Backtrace::capture as *mut ()).addr()) } /// Forcibly captures a full backtrace, regardless of environment variable @@ -308,7 +308,7 @@ impl Backtrace { /// parts of code. #[inline(never)] // want to make sure there's a frame here to remove pub fn force_capture() -> Backtrace { - Backtrace::create(Backtrace::force_capture as usize) + Backtrace::create((Backtrace::force_capture as *mut ()).addr()) } /// Forcibly captures a disabled backtrace, regardless of environment @@ -330,7 +330,7 @@ impl Backtrace { frame: RawFrame::Actual(frame.clone()), symbols: Vec::new(), }); - if frame.symbol_address() as usize == ip && actual_start.is_none() { + if frame.symbol_address().addr() == ip && actual_start.is_none() { actual_start = Some(frames.len()); } true @@ -493,7 +493,7 @@ impl RawFrame { match self { RawFrame::Actual(frame) => frame.ip(), #[cfg(test)] - RawFrame::Fake => 1 as *mut c_void, + RawFrame::Fake => ptr::invalid_mut(1), } } } diff --git a/library/std/src/io/error/repr_bitpacked.rs b/library/std/src/io/error/repr_bitpacked.rs index 1a0538f861a1e..5a39c6d2ec26a 100644 --- a/library/std/src/io/error/repr_bitpacked.rs +++ b/library/std/src/io/error/repr_bitpacked.rs @@ -105,7 +105,7 @@ use super::{Custom, ErrorData, ErrorKind, SimpleMessage}; use alloc::boxed::Box; use core::mem::{align_of, size_of}; -use core::ptr::NonNull; +use core::ptr::{self, NonNull}; // The 2 least-significant bits are used as tag. const TAG_MASK: usize = 0b11; @@ -126,7 +126,7 @@ impl Repr { let p = Box::into_raw(b).cast::(); // Should only be possible if an allocator handed out a pointer with // wrong alignment. - debug_assert_eq!((p as usize & TAG_MASK), 0); + debug_assert_eq!((p.addr() & TAG_MASK), 0); // Note: We know `TAG_CUSTOM <= size_of::()` (static_assert at // end of file), and both the start and end of the expression must be // valid without address space wraparound due to `Box`'s semantics. @@ -156,7 +156,7 @@ impl Repr { pub(super) fn new_os(code: i32) -> Self { let utagged = ((code as usize) << 32) | TAG_OS; // Safety: `TAG_OS` is not zero, so the result of the `|` is not 0. - let res = Self(unsafe { NonNull::new_unchecked(utagged as *mut ()) }); + let res = Self(unsafe { NonNull::new_unchecked(ptr::invalid_mut(utagged)) }); // quickly smoke-check we encoded the right thing (This generally will // only run in libstd's tests, unless the user uses -Zbuild-std) debug_assert!( @@ -170,7 +170,7 @@ impl Repr { pub(super) fn new_simple(kind: ErrorKind) -> Self { let utagged = ((kind as usize) << 32) | TAG_SIMPLE; // Safety: `TAG_SIMPLE` is not zero, so the result of the `|` is not 0. - let res = Self(unsafe { NonNull::new_unchecked(utagged as *mut ()) }); + let res = Self(unsafe { NonNull::new_unchecked(ptr::invalid_mut(utagged)) }); // quickly smoke-check we encoded the right thing (This generally will // only run in libstd's tests, unless the user uses -Zbuild-std) debug_assert!( @@ -228,7 +228,7 @@ unsafe fn decode_repr(ptr: NonNull<()>, make_custom: F) -> ErrorData where F: FnOnce(*mut Custom) -> C, { - let bits = ptr.as_ptr() as usize; + let bits = ptr.as_ptr().addr(); match bits & TAG_MASK { TAG_OS => { let code = ((bits as i64) >> 32) as i32; diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index a464f2d4c7431..133ced5f26cfb 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -275,6 +275,7 @@ #![feature(extend_one)] #![feature(float_minimum_maximum)] #![feature(format_args_nl)] +#![feature(strict_provenance)] #![feature(get_mut_unchecked)] #![feature(hashmap_internals)] #![feature(int_error_internals)] diff --git a/library/std/src/os/windows/io/handle.rs b/library/std/src/os/windows/io/handle.rs index be2ccbd98e9c2..c615d6076d78c 100644 --- a/library/std/src/os/windows/io/handle.rs +++ b/library/std/src/os/windows/io/handle.rs @@ -9,6 +9,7 @@ use crate::fs; use crate::io; use crate::marker::PhantomData; use crate::mem::forget; +use crate::ptr; use crate::sys::c; use crate::sys::cvt; use crate::sys_common::{AsInner, FromInner, IntoInner}; @@ -174,7 +175,7 @@ impl OwnedHandle { return unsafe { Ok(Self::from_raw_handle(handle)) }; } - let mut ret = 0 as c::HANDLE; + let mut ret = ptr::null_mut(); cvt(unsafe { let cur_proc = c::GetCurrentProcess(); c::DuplicateHandle( diff --git a/library/std/src/os/windows/io/socket.rs b/library/std/src/os/windows/io/socket.rs index a6b979cc22bd3..92a872fdabecd 100644 --- a/library/std/src/os/windows/io/socket.rs +++ b/library/std/src/os/windows/io/socket.rs @@ -129,6 +129,8 @@ impl OwnedSocket { } } + // FIXME(strict_provenance_magic): we defined RawSocket to be a u64 ;-; + #[cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] #[cfg(not(target_vendor = "uwp"))] pub(crate) fn set_no_inherit(&self) -> io::Result<()> { cvt(unsafe { diff --git a/library/std/src/path.rs b/library/std/src/path.rs index bcf5c9328b79c..8ecea8ce07f6b 100644 --- a/library/std/src/path.rs +++ b/library/std/src/path.rs @@ -1449,8 +1449,8 @@ impl PathBuf { }; // truncate until right after the file stem - let end_file_stem = file_stem[file_stem.len()..].as_ptr() as usize; - let start = os_str_as_u8_slice(&self.inner).as_ptr() as usize; + let end_file_stem = file_stem[file_stem.len()..].as_ptr().addr(); + let start = os_str_as_u8_slice(&self.inner).as_ptr().addr(); let v = self.as_mut_vec(); v.truncate(end_file_stem.wrapping_sub(start)); diff --git a/library/std/src/sync/once.rs b/library/std/src/sync/once.rs index 511de863dc51b..d2dd4c075d2a9 100644 --- a/library/std/src/sync/once.rs +++ b/library/std/src/sync/once.rs @@ -91,9 +91,12 @@ use crate::cell::Cell; use crate::fmt; use crate::marker; use crate::panic::{RefUnwindSafe, UnwindSafe}; -use crate::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; +use crate::ptr; +use crate::sync::atomic::{AtomicBool, AtomicPtr, Ordering}; use crate::thread::{self, Thread}; +type Masked = (); + /// A synchronization primitive which can be used to run a one-time global /// initialization. Useful for one-time initialization for FFI or related /// functionality. This type can only be constructed with [`Once::new()`]. @@ -113,7 +116,7 @@ use crate::thread::{self, Thread}; pub struct Once { // `state_and_queue` is actually a pointer to a `Waiter` with extra state // bits, so we add the `PhantomData` appropriately. - state_and_queue: AtomicUsize, + state_and_queue: AtomicPtr, _marker: marker::PhantomData<*const Waiter>, } @@ -136,7 +139,7 @@ impl RefUnwindSafe for Once {} #[derive(Debug)] pub struct OnceState { poisoned: bool, - set_state_on_drop_to: Cell, + set_state_on_drop_to: Cell<*mut Masked>, } /// Initialization value for static [`Once`] values. @@ -184,8 +187,8 @@ struct Waiter { // Every node is a struct on the stack of a waiting thread. // Will wake up the waiters when it gets dropped, i.e. also on panic. struct WaiterQueue<'a> { - state_and_queue: &'a AtomicUsize, - set_state_on_drop_to: usize, + state_and_queue: &'a AtomicPtr, + set_state_on_drop_to: *mut Masked, } impl Once { @@ -195,7 +198,10 @@ impl Once { #[rustc_const_stable(feature = "const_once_new", since = "1.32.0")] #[must_use] pub const fn new() -> Once { - Once { state_and_queue: AtomicUsize::new(INCOMPLETE), _marker: marker::PhantomData } + Once { + state_and_queue: AtomicPtr::new(ptr::invalid_mut(INCOMPLETE)), + _marker: marker::PhantomData, + } } /// Performs an initialization routine once and only once. The given closure @@ -376,7 +382,7 @@ impl Once { // operations visible to us, and, this being a fast path, weaker // ordering helps with performance. This `Acquire` synchronizes with // `Release` operations on the slow path. - self.state_and_queue.load(Ordering::Acquire) == COMPLETE + self.state_and_queue.load(Ordering::Acquire).addr() == COMPLETE } // This is a non-generic function to reduce the monomorphization cost of @@ -395,7 +401,7 @@ impl Once { fn call_inner(&self, ignore_poisoning: bool, init: &mut dyn FnMut(&OnceState)) { let mut state_and_queue = self.state_and_queue.load(Ordering::Acquire); loop { - match state_and_queue { + match state_and_queue.addr() { COMPLETE => break, POISONED if !ignore_poisoning => { // Panic to propagate the poison. @@ -405,7 +411,7 @@ impl Once { // Try to register this thread as the one RUNNING. let exchange_result = self.state_and_queue.compare_exchange( state_and_queue, - RUNNING, + ptr::invalid_mut(RUNNING), Ordering::Acquire, Ordering::Acquire, ); @@ -417,13 +423,13 @@ impl Once { // wake them up on drop. let mut waiter_queue = WaiterQueue { state_and_queue: &self.state_and_queue, - set_state_on_drop_to: POISONED, + set_state_on_drop_to: ptr::invalid_mut(POISONED), }; // Run the initialization function, letting it know if we're // poisoned or not. let init_state = OnceState { - poisoned: state_and_queue == POISONED, - set_state_on_drop_to: Cell::new(COMPLETE), + poisoned: state_and_queue.addr() == POISONED, + set_state_on_drop_to: Cell::new(ptr::invalid_mut(COMPLETE)), }; init(&init_state); waiter_queue.set_state_on_drop_to = init_state.set_state_on_drop_to.get(); @@ -432,7 +438,7 @@ impl Once { _ => { // All other values must be RUNNING with possibly a // pointer to the waiter queue in the more significant bits. - assert!(state_and_queue & STATE_MASK == RUNNING); + assert!(state_and_queue.addr() & STATE_MASK == RUNNING); wait(&self.state_and_queue, state_and_queue); state_and_queue = self.state_and_queue.load(Ordering::Acquire); } @@ -441,13 +447,13 @@ impl Once { } } -fn wait(state_and_queue: &AtomicUsize, mut current_state: usize) { +fn wait(state_and_queue: &AtomicPtr, mut current_state: *mut Masked) { // Note: the following code was carefully written to avoid creating a // mutable reference to `node` that gets aliased. loop { // Don't queue this thread if the status is no longer running, // otherwise we will not be woken up. - if current_state & STATE_MASK != RUNNING { + if current_state.addr() & STATE_MASK != RUNNING { return; } @@ -455,15 +461,15 @@ fn wait(state_and_queue: &AtomicUsize, mut current_state: usize) { let node = Waiter { thread: Cell::new(Some(thread::current())), signaled: AtomicBool::new(false), - next: (current_state & !STATE_MASK) as *const Waiter, + next: current_state.with_addr(current_state.addr() & !STATE_MASK) as *const Waiter, }; - let me = &node as *const Waiter as usize; + let me = &node as *const Waiter as *const Masked as *mut Masked; // Try to slide in the node at the head of the linked list, making sure // that another thread didn't just replace the head of the linked list. let exchange_result = state_and_queue.compare_exchange( current_state, - me | RUNNING, + me.with_addr(me.addr() | RUNNING), Ordering::Release, Ordering::Relaxed, ); @@ -502,7 +508,7 @@ impl Drop for WaiterQueue<'_> { self.state_and_queue.swap(self.set_state_on_drop_to, Ordering::AcqRel); // We should only ever see an old state which was RUNNING. - assert_eq!(state_and_queue & STATE_MASK, RUNNING); + assert_eq!(state_and_queue.addr() & STATE_MASK, RUNNING); // Walk the entire linked list of waiters and wake them up (in lifo // order, last to register is first to wake up). @@ -511,7 +517,8 @@ impl Drop for WaiterQueue<'_> { // free `node` if there happens to be has a spurious wakeup. // So we have to take out the `thread` field and copy the pointer to // `next` first. - let mut queue = (state_and_queue & !STATE_MASK) as *const Waiter; + let mut queue = + state_and_queue.with_addr(state_and_queue.addr() & !STATE_MASK) as *const Waiter; while !queue.is_null() { let next = (*queue).next; let thread = (*queue).thread.take().unwrap(); @@ -568,6 +575,6 @@ impl OnceState { /// Poison the associated [`Once`] without explicitly panicking. // NOTE: This is currently only exposed for the `lazy` module pub(crate) fn poison(&self) { - self.set_state_on_drop_to.set(POISONED); + self.set_state_on_drop_to.set(ptr::invalid_mut(POISONED)); } } diff --git a/library/std/src/sys/windows/alloc.rs b/library/std/src/sys/windows/alloc.rs index 2fe71f9f28d5c..fdc81cdea7dec 100644 --- a/library/std/src/sys/windows/alloc.rs +++ b/library/std/src/sys/windows/alloc.rs @@ -159,7 +159,7 @@ unsafe fn allocate(layout: Layout, zeroed: bool) -> *mut u8 { // Create a correctly aligned pointer offset from the start of the allocated block, // and write a header before it. - let offset = layout.align() - (ptr as usize & (layout.align() - 1)); + let offset = layout.align() - (ptr.addr() & (layout.align() - 1)); // SAFETY: `MIN_ALIGN` <= `offset` <= `layout.align()` and the size of the allocated // block is `layout.align() + layout.size()`. `aligned` will thus be a correctly aligned // pointer inside the allocated block with at least `layout.size()` bytes after it and at diff --git a/library/std/src/sys/windows/c.rs b/library/std/src/sys/windows/c.rs index 9b61b2476d5bb..0edf43e5d9dd5 100644 --- a/library/std/src/sys/windows/c.rs +++ b/library/std/src/sys/windows/c.rs @@ -173,7 +173,7 @@ pub const PROGRESS_CONTINUE: DWORD = 0; pub const E_NOTIMPL: HRESULT = 0x80004001u32 as HRESULT; -pub const INVALID_HANDLE_VALUE: HANDLE = !0 as HANDLE; +pub const INVALID_HANDLE_VALUE: HANDLE = ptr::invalid_mut(!0); pub const FACILITY_NT_BIT: DWORD = 0x1000_0000; diff --git a/library/std/src/sys/windows/compat.rs b/library/std/src/sys/windows/compat.rs index cbd3366b189ed..a914a3bcc120b 100644 --- a/library/std/src/sys/windows/compat.rs +++ b/library/std/src/sys/windows/compat.rs @@ -88,7 +88,7 @@ macro_rules! compat_fn { let symbol_name: *const u8 = concat!(stringify!($symbol), "\0").as_ptr(); let module_handle = $crate::sys::c::GetModuleHandleA(module_name as *const i8); if !module_handle.is_null() { - match $crate::sys::c::GetProcAddress(module_handle, symbol_name as *const i8) as usize { + match $crate::sys::c::GetProcAddress(module_handle, symbol_name as *const i8).addr() { 0 => {} n => { PTR = Some(mem::transmute::(n)); diff --git a/library/std/src/sys/windows/fs.rs b/library/std/src/sys/windows/fs.rs index d6c40a15329a9..95903899297b6 100644 --- a/library/std/src/sys/windows/fs.rs +++ b/library/std/src/sys/windows/fs.rs @@ -57,6 +57,9 @@ pub struct DirEntry { data: c::WIN32_FIND_DATAW, } +unsafe impl Send for OpenOptions {} +unsafe impl Sync for OpenOptions {} + #[derive(Clone, Debug)] pub struct OpenOptions { // generic @@ -72,7 +75,7 @@ pub struct OpenOptions { attributes: c::DWORD, share_mode: c::DWORD, security_qos_flags: c::DWORD, - security_attributes: usize, // FIXME: should be a reference + security_attributes: c::LPSECURITY_ATTRIBUTES, } #[derive(Clone, PartialEq, Eq, Debug)] @@ -187,7 +190,7 @@ impl OpenOptions { share_mode: c::FILE_SHARE_READ | c::FILE_SHARE_WRITE | c::FILE_SHARE_DELETE, attributes: 0, security_qos_flags: 0, - security_attributes: 0, + security_attributes: ptr::null_mut(), } } @@ -228,7 +231,7 @@ impl OpenOptions { self.security_qos_flags = flags | c::SECURITY_SQOS_PRESENT; } pub fn security_attributes(&mut self, attrs: c::LPSECURITY_ATTRIBUTES) { - self.security_attributes = attrs as usize; + self.security_attributes = attrs; } fn get_access_mode(&self) -> io::Result { @@ -289,7 +292,7 @@ impl File { path.as_ptr(), opts.get_access_mode()?, opts.share_mode, - opts.security_attributes as *mut _, + opts.security_attributes, opts.get_creation_mode()?, opts.get_flags_and_attributes(), ptr::null_mut(), diff --git a/library/std/src/sys/windows/mod.rs b/library/std/src/sys/windows/mod.rs index 6097e62876847..a6678c665d12e 100644 --- a/library/std/src/sys/windows/mod.rs +++ b/library/std/src/sys/windows/mod.rs @@ -138,7 +138,7 @@ pub fn unrolled_find_u16s(needle: u16, haystack: &[u16]) -> Option { ($($n:literal,)+) => { $( if start[$n] == needle { - return Some((&start[$n] as *const u16 as usize - ptr as usize) / 2); + return Some(((&start[$n] as *const u16).addr() - ptr.addr()) / 2); } )+ } @@ -151,7 +151,7 @@ pub fn unrolled_find_u16s(needle: u16, haystack: &[u16]) -> Option { for c in start { if *c == needle { - return Some((c as *const u16 as usize - ptr as usize) / 2); + return Some(((c as *const u16).addr() - ptr.addr()) / 2); } } None diff --git a/library/std/src/sys/windows/os.rs b/library/std/src/sys/windows/os.rs index 450bceae00081..bcac996c024ec 100644 --- a/library/std/src/sys/windows/os.rs +++ b/library/std/src/sys/windows/os.rs @@ -134,7 +134,7 @@ impl Drop for Env { pub fn env() -> Env { unsafe { let ch = c::GetEnvironmentStringsW(); - if ch as usize == 0 { + if ch.is_null() { panic!("failure getting env string from OS: {}", io::Error::last_os_error()); } Env { base: ch, cur: ch } diff --git a/library/std/src/sys/windows/thread_parker.rs b/library/std/src/sys/windows/thread_parker.rs index 5888ee8e34bfb..3497da51deeda 100644 --- a/library/std/src/sys/windows/thread_parker.rs +++ b/library/std/src/sys/windows/thread_parker.rs @@ -60,7 +60,7 @@ use crate::convert::TryFrom; use crate::ptr; use crate::sync::atomic::{ - AtomicI8, AtomicUsize, + AtomicI8, AtomicPtr, Ordering::{Acquire, Relaxed, Release}, }; use crate::sys::{c, dur2timeout}; @@ -217,8 +217,8 @@ impl Parker { } fn keyed_event_handle() -> c::HANDLE { - const INVALID: usize = !0; - static HANDLE: AtomicUsize = AtomicUsize::new(INVALID); + const INVALID: c::HANDLE = ptr::invalid_mut(!0); + static HANDLE: AtomicPtr = AtomicPtr::new(INVALID); match HANDLE.load(Relaxed) { INVALID => { let mut handle = c::INVALID_HANDLE_VALUE; @@ -233,7 +233,7 @@ fn keyed_event_handle() -> c::HANDLE { r => panic!("Unable to create keyed event handle: error {r}"), } } - match HANDLE.compare_exchange(INVALID, handle as usize, Relaxed, Relaxed) { + match HANDLE.compare_exchange(INVALID, handle, Relaxed, Relaxed) { Ok(_) => handle, Err(h) => { // Lost the race to another thread initializing HANDLE before we did. @@ -241,10 +241,10 @@ fn keyed_event_handle() -> c::HANDLE { unsafe { c::CloseHandle(handle); } - h as c::HANDLE + h } } } - handle => handle as c::HANDLE, + handle => handle, } } diff --git a/library/std/src/sys_common/condvar/check.rs b/library/std/src/sys_common/condvar/check.rs index 1578a2de60cef..f53f8ebbaa091 100644 --- a/library/std/src/sys_common/condvar/check.rs +++ b/library/std/src/sys_common/condvar/check.rs @@ -1,4 +1,5 @@ -use crate::sync::atomic::{AtomicUsize, Ordering}; +use crate::ptr; +use crate::sync::atomic::{AtomicPtr, Ordering}; use crate::sys::mutex as mutex_imp; use crate::sys_common::mutex::MovableMutex; @@ -13,17 +14,18 @@ impl CondvarCheck for Box { } pub struct SameMutexCheck { - addr: AtomicUsize, + addr: AtomicPtr<()>, } #[allow(dead_code)] impl SameMutexCheck { pub const fn new() -> Self { - Self { addr: AtomicUsize::new(0) } + Self { addr: AtomicPtr::new(ptr::null_mut()) } } pub fn verify(&self, mutex: &MovableMutex) { - let addr = mutex.raw() as *const mutex_imp::Mutex as usize; - match self.addr.compare_exchange(0, addr, Ordering::SeqCst, Ordering::SeqCst) { + let addr = mutex.raw() as *const mutex_imp::Mutex as *const () as *mut _; + match self.addr.compare_exchange(ptr::null_mut(), addr, Ordering::SeqCst, Ordering::SeqCst) + { Ok(_) => {} // Stored the address Err(n) if n == addr => {} // Lost a race to store the same address _ => panic!("attempted to use a condition variable with two mutexes"), diff --git a/library/std/src/thread/local.rs b/library/std/src/thread/local.rs index a100444f04968..ca29261b1c98d 100644 --- a/library/std/src/thread/local.rs +++ b/library/std/src/thread/local.rs @@ -1071,7 +1071,7 @@ pub mod os { pub unsafe fn get(&'static self, init: impl FnOnce() -> T) -> Option<&'static T> { // SAFETY: See the documentation for this method. let ptr = unsafe { self.os.get() as *mut Value }; - if ptr as usize > 1 { + if ptr.addr() > 1 { // SAFETY: the check ensured the pointer is safe (its destructor // is not running) + it is coming from a trusted source (self). if let Some(ref value) = unsafe { (*ptr).inner.get() } { @@ -1090,7 +1090,7 @@ pub mod os { // SAFETY: No mutable references are ever handed out meaning getting // the value is ok. let ptr = unsafe { self.os.get() as *mut Value }; - if ptr as usize == 1 { + if ptr.addr() == 1 { // destructor is running return None; } @@ -1130,7 +1130,7 @@ pub mod os { unsafe { let ptr = Box::from_raw(ptr as *mut Value); let key = ptr.key; - key.os.set(1 as *mut u8); + key.os.set(ptr::invalid_mut(1)); drop(ptr); key.os.set(ptr::null_mut()); } From 09be0276b39c39f2cdda1e5f8e2b4b441a6eadf2 Mon Sep 17 00:00:00 2001 From: Aria Beingessner Date: Tue, 22 Mar 2022 16:21:33 -0400 Subject: [PATCH 04/10] WIP PROOF-OF-CONCEPT handle all the fallout in rustc Why does rustc do oh so many crimes? Oh so many... --- compiler/rustc_arena/src/lib.rs | 20 ++++++++++--------- compiler/rustc_codegen_ssa/src/lib.rs | 1 + compiler/rustc_codegen_ssa/src/mono_item.rs | 2 +- .../rustc_data_structures/src/tagged_ptr.rs | 3 +++ compiler/rustc_interface/src/util.rs | 3 +++ compiler/rustc_middle/src/ty/adt.rs | 3 +++ compiler/rustc_middle/src/ty/context.rs | 3 +++ compiler/rustc_middle/src/ty/impls_ty.rs | 3 +++ compiler/rustc_middle/src/ty/list.rs | 3 +++ compiler/rustc_middle/src/ty/subst.rs | 3 +++ compiler/rustc_parse/src/parser/expr.rs | 3 +++ 11 files changed, 37 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_arena/src/lib.rs b/compiler/rustc_arena/src/lib.rs index 3928d70c0ede2..de1d5c07f5028 100644 --- a/compiler/rustc_arena/src/lib.rs +++ b/compiler/rustc_arena/src/lib.rs @@ -18,6 +18,7 @@ #![feature(decl_macro)] #![feature(rustc_attrs)] #![cfg_attr(test, feature(test))] +#![feature(strict_provenance)] use smallvec::SmallVec; @@ -87,7 +88,7 @@ impl ArenaChunk { unsafe { if mem::size_of::() == 0 { // A pointer as large as possible for zero-sized elements. - !0 as *mut T + ptr::invalid_mut(!0) } else { self.start().add(self.storage.len()) } @@ -199,7 +200,7 @@ impl TypedArena { unsafe { if mem::size_of::() == 0 { self.ptr.set((self.ptr.get() as *mut u8).wrapping_offset(1) as *mut T); - let ptr = mem::align_of::() as *mut T; + let ptr = ptr::NonNull::::dangling().as_ptr(); // Don't drop the object. This `write` is equivalent to `forget`. ptr::write(ptr, object); &mut *ptr @@ -216,7 +217,7 @@ impl TypedArena { #[inline] fn can_allocate(&self, additional: usize) -> bool { - let available_bytes = self.end.get() as usize - self.ptr.get() as usize; + let available_bytes = self.end.get().addr() - self.ptr.get().addr(); let additional_bytes = additional.checked_mul(mem::size_of::()).unwrap(); available_bytes >= additional_bytes } @@ -262,7 +263,7 @@ impl TypedArena { // If a type is `!needs_drop`, we don't need to keep track of how many elements // the chunk stores - the field will be ignored anyway. if mem::needs_drop::() { - let used_bytes = self.ptr.get() as usize - last_chunk.start() as usize; + let used_bytes = self.ptr.get().addr() - last_chunk.start().addr(); last_chunk.entries = used_bytes / mem::size_of::(); } @@ -288,9 +289,9 @@ impl TypedArena { // chunks. fn clear_last_chunk(&self, last_chunk: &mut ArenaChunk) { // Determine how much was filled. - let start = last_chunk.start() as usize; + let start = last_chunk.start().addr(); // We obtain the value of the pointer to the first uninitialized element. - let end = self.ptr.get() as usize; + let end = self.ptr.get().addr(); // We then calculate the number of elements to be dropped in the last chunk, // which is the filled area's length. let diff = if mem::size_of::() == 0 { @@ -395,15 +396,16 @@ impl DroplessArena { /// request. #[inline] fn alloc_raw_without_grow(&self, layout: Layout) -> Option<*mut u8> { - let start = self.start.get() as usize; - let end = self.end.get() as usize; + let start = self.start.get().addr(); + let old_end = self.end.get(); + let end = old_end.addr(); let align = layout.align(); let bytes = layout.size(); let new_end = end.checked_sub(bytes)? & !(align - 1); if start <= new_end { - let new_end = new_end as *mut u8; + let new_end = old_end.with_addr(new_end); self.end.set(new_end); Some(new_end) } else { diff --git a/compiler/rustc_codegen_ssa/src/lib.rs b/compiler/rustc_codegen_ssa/src/lib.rs index 25e27f565eae2..6cf6be79a8628 100644 --- a/compiler/rustc_codegen_ssa/src/lib.rs +++ b/compiler/rustc_codegen_ssa/src/lib.rs @@ -6,6 +6,7 @@ #![feature(once_cell)] #![feature(nll)] #![feature(associated_type_bounds)] +#![feature(strict_provenance)] #![recursion_limit = "256"] #![allow(rustc::potential_query_instability)] diff --git a/compiler/rustc_codegen_ssa/src/mono_item.rs b/compiler/rustc_codegen_ssa/src/mono_item.rs index 5f0f50ae2df1d..5414c619dcbca 100644 --- a/compiler/rustc_codegen_ssa/src/mono_item.rs +++ b/compiler/rustc_codegen_ssa/src/mono_item.rs @@ -116,7 +116,7 @@ impl<'a, 'tcx: 'a> MonoItemExt<'a, 'tcx> for MonoItem<'tcx> { fn to_raw_string(&self) -> String { match *self { MonoItem::Fn(instance) => { - format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr() as usize) + format!("Fn({:?}, {})", instance.def, instance.substs.as_ptr().addr()) } MonoItem::Static(id) => format!("Static({:?})", id), MonoItem::GlobalAsm(id) => format!("GlobalAsm({:?})", id), diff --git a/compiler/rustc_data_structures/src/tagged_ptr.rs b/compiler/rustc_data_structures/src/tagged_ptr.rs index 324a8624dd076..338bffab503fe 100644 --- a/compiler/rustc_data_structures/src/tagged_ptr.rs +++ b/compiler/rustc_data_structures/src/tagged_ptr.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): rustc is grounded for pointer crimes. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + //! This module implements tagged pointers. //! //! In order to utilize the pointer packing, you must have two types: a pointer, diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs index 592cf60e6c3bb..3b14c037b69e8 100644 --- a/compiler/rustc_interface/src/util.rs +++ b/compiler/rustc_interface/src/util.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): rustc is grounded for pointer crimes. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use libloading::Library; use rustc_ast as ast; use rustc_codegen_ssa::traits::CodegenBackend; diff --git a/compiler/rustc_middle/src/ty/adt.rs b/compiler/rustc_middle/src/ty/adt.rs index cb219c4c4e4c9..f7f30e4d30569 100644 --- a/compiler/rustc_middle/src/ty/adt.rs +++ b/compiler/rustc_middle/src/ty/adt.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): rustc is grounded for pointer crimes. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use crate::mir::interpret::ErrorHandled; use crate::ty; use crate::ty::util::{Discr, IntTypeExt}; diff --git a/compiler/rustc_middle/src/ty/context.rs b/compiler/rustc_middle/src/ty/context.rs index f51e6c2bc1f4d..7d245f21a5ae5 100644 --- a/compiler/rustc_middle/src/ty/context.rs +++ b/compiler/rustc_middle/src/ty/context.rs @@ -1,5 +1,8 @@ //! Type context book-keeping. +// FIXME(strict_provenance_magic): rustc is grounded for pointer crimes. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use crate::arena::Arena; use crate::dep_graph::{DepGraph, DepKind, DepKindStruct}; use crate::hir::place::Place as HirPlace; diff --git a/compiler/rustc_middle/src/ty/impls_ty.rs b/compiler/rustc_middle/src/ty/impls_ty.rs index 54a345daec8a5..e8125bddd21d3 100644 --- a/compiler/rustc_middle/src/ty/impls_ty.rs +++ b/compiler/rustc_middle/src/ty/impls_ty.rs @@ -1,6 +1,9 @@ //! This module contains `HashStable` implementations for various data types //! from `rustc_middle::ty` in no particular order. +// FIXME(strict_provenance_magic): rustc is grounded for pointer crimes. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use crate::middle::region; use crate::mir; use crate::ty; diff --git a/compiler/rustc_middle/src/ty/list.rs b/compiler/rustc_middle/src/ty/list.rs index adba7d131592e..50190e2615b18 100644 --- a/compiler/rustc_middle/src/ty/list.rs +++ b/compiler/rustc_middle/src/ty/list.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): rustc is grounded for pointer crimes. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use crate::arena::Arena; use rustc_serialize::{Encodable, Encoder}; use std::alloc::Layout; diff --git a/compiler/rustc_middle/src/ty/subst.rs b/compiler/rustc_middle/src/ty/subst.rs index 0a6cb276f7578..7e8e7a4520c6e 100644 --- a/compiler/rustc_middle/src/ty/subst.rs +++ b/compiler/rustc_middle/src/ty/subst.rs @@ -1,5 +1,8 @@ // Type substitutions. +// FIXME(strict_provenance_magic): rustc is grounded for pointer crimes. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use crate::mir; use crate::ty::codec::{TyDecoder, TyEncoder}; use crate::ty::fold::{FallibleTypeFolder, TypeFoldable, TypeFolder, TypeVisitor}; diff --git a/compiler/rustc_parse/src/parser/expr.rs b/compiler/rustc_parse/src/parser/expr.rs index a907f50a11bbf..40b9baf0602e5 100644 --- a/compiler/rustc_parse/src/parser/expr.rs +++ b/compiler/rustc_parse/src/parser/expr.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): rustc is grounded for pointer crimes. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use super::diagnostics::SnapshotParser; use super::pat::{CommaRecoveryMode, RecoverColon, RecoverComma, PARAM_EXPECTED}; use super::ty::{AllowPlus, RecoverQPath, RecoverReturnSign}; From 81f77d3d7d33fdf2a82dc65ed2500b51c6a4e094 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Tue, 22 Mar 2022 21:29:38 -0400 Subject: [PATCH 05/10] WIP PROOF-OF-CONCEPT fixup linux libs --- library/panic_unwind/src/gcc.rs | 7 +++ library/std/src/backtrace.rs | 2 +- library/std/src/os/unix/net/addr.rs | 4 +- library/std/src/sys/unix/memchr.rs | 4 +- .../std/src/sys/unix/process/process_unix.rs | 3 ++ library/std/src/sys/unix/stack_overflow.rs | 6 ++- library/std/src/sys/unix/thread.rs | 51 ++++++++++--------- library/std/src/sys/unix/weak.rs | 28 +++++----- 8 files changed, 62 insertions(+), 43 deletions(-) diff --git a/library/panic_unwind/src/gcc.rs b/library/panic_unwind/src/gcc.rs index a0297b4b2f524..179a90747dcac 100644 --- a/library/panic_unwind/src/gcc.rs +++ b/library/panic_unwind/src/gcc.rs @@ -36,6 +36,13 @@ //! Once stack has been unwound down to the handler frame level, unwinding stops //! and the last personality routine transfers control to the catch block. +// FIXME(strict_provenance_magic): the unwinder has special permissions and semantics. +// +// This is at worst an Interesting Case Study that is worth doing a deep dive on. +// +// I haven't looked closely at this implementation yet. +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use alloc::boxed::Box; use core::any::Any; diff --git a/library/std/src/backtrace.rs b/library/std/src/backtrace.rs index fded482095a46..3b0922ad16f25 100644 --- a/library/std/src/backtrace.rs +++ b/library/std/src/backtrace.rs @@ -493,7 +493,7 @@ impl RawFrame { match self { RawFrame::Actual(frame) => frame.ip(), #[cfg(test)] - RawFrame::Fake => ptr::invalid_mut(1), + RawFrame::Fake => crate::ptr::invalid_mut(1), } } } diff --git a/library/std/src/os/unix/net/addr.rs b/library/std/src/os/unix/net/addr.rs index a3ef4b2d92cc4..c323161d76537 100644 --- a/library/std/src/os/unix/net/addr.rs +++ b/library/std/src/os/unix/net/addr.rs @@ -17,8 +17,8 @@ mod libc { fn sun_path_offset(addr: &libc::sockaddr_un) -> usize { // Work with an actual instance of the type since using a null pointer is UB - let base = addr as *const _ as usize; - let path = &addr.sun_path as *const _ as usize; + let base = (addr as *const libc::sockaddr_un).addr(); + let path = (&addr.sun_path as *const i8).addr(); path - base } diff --git a/library/std/src/sys/unix/memchr.rs b/library/std/src/sys/unix/memchr.rs index a9273ea676cb3..a3e4f8ff56aee 100644 --- a/library/std/src/sys/unix/memchr.rs +++ b/library/std/src/sys/unix/memchr.rs @@ -9,7 +9,7 @@ pub fn memchr(needle: u8, haystack: &[u8]) -> Option { haystack.len(), ) }; - if p.is_null() { None } else { Some(p as usize - (haystack.as_ptr() as usize)) } + if p.is_null() { None } else { Some(p.addr() - haystack.as_ptr().addr()) } } pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { @@ -26,7 +26,7 @@ pub fn memrchr(needle: u8, haystack: &[u8]) -> Option { haystack.len(), ) }; - if p.is_null() { None } else { Some(p as usize - (haystack.as_ptr() as usize)) } + if p.is_null() { None } else { Some(p.addr() - haystack.as_ptr().addr()) } } #[cfg(not(target_os = "linux"))] diff --git a/library/std/src/sys/unix/process/process_unix.rs b/library/std/src/sys/unix/process/process_unix.rs index 3d305cd7310fd..ef7c896deb4df 100644 --- a/library/std/src/sys/unix/process/process_unix.rs +++ b/library/std/src/sys/unix/process/process_unix.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): system API wants us to pass a pointer as a u64 :( +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use crate::convert::{TryFrom, TryInto}; use crate::fmt; use crate::io::{self, Error, ErrorKind}; diff --git a/library/std/src/sys/unix/stack_overflow.rs b/library/std/src/sys/unix/stack_overflow.rs index 1e8d1137ac8b8..f956996c2ac96 100644 --- a/library/std/src/sys/unix/stack_overflow.rs +++ b/library/std/src/sys/unix/stack_overflow.rs @@ -1,3 +1,5 @@ +// FIXME(strict_provenance_magic): system API wants us to pass a pointer as size_t :( +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] #![cfg_attr(test, allow(dead_code))] use self::imp::{drop_handler, make_handler}; @@ -62,12 +64,12 @@ mod imp { si_addr: *mut libc::c_void, } - (*(info as *const siginfo_t)).si_addr as usize + (*(info as *const siginfo_t)).si_addr.addr() } #[cfg(not(any(target_os = "linux", target_os = "android")))] unsafe fn siginfo_si_addr(info: *mut libc::siginfo_t) -> usize { - (*info).si_addr as usize + (*info).si_addr.addr() } // Signal handler for the SIGSEGV and SIGBUS handlers. We've got guard pages diff --git a/library/std/src/sys/unix/thread.rs b/library/std/src/sys/unix/thread.rs index 2d5d306ed62bb..6acc3e4653306 100644 --- a/library/std/src/sys/unix/thread.rs +++ b/library/std/src/sys/unix/thread.rs @@ -1,3 +1,6 @@ +// FIXME(strict_provenance_magic): system API wants us to pass a pointer as ulong :( +#![cfg_attr(not(bootstrap), allow(fuzzy_provenance_casts))] + use crate::cmp; use crate::ffi::CStr; use crate::io; @@ -505,9 +508,8 @@ pub mod guard { #[cfg(target_os = "macos")] unsafe fn get_stack_start() -> Option<*mut libc::c_void> { let th = libc::pthread_self(); - let stackaddr = - libc::pthread_get_stackaddr_np(th) as usize - libc::pthread_get_stacksize_np(th); - Some(stackaddr as *mut libc::c_void) + let stackptr = libc::pthread_get_stackaddr_np(th); + Some(stackptr.with_addr(stackptr.with_addr() - libc::pthread_get_stacksize_np(th))) } #[cfg(target_os = "openbsd")] @@ -515,14 +517,15 @@ pub mod guard { let mut current_stack: libc::stack_t = crate::mem::zeroed(); assert_eq!(libc::pthread_stackseg_np(libc::pthread_self(), &mut current_stack), 0); + let stack_ptr = current_stack.ss_sp; let stackaddr = if libc::pthread_main_np() == 1 { // main thread - current_stack.ss_sp as usize - current_stack.ss_size + PAGE_SIZE.load(Ordering::Relaxed) + stack_ptr.addr() - current_stack.ss_size + PAGE_SIZE.load(Ordering::Relaxed) } else { // new thread - current_stack.ss_sp as usize - current_stack.ss_size + stack_ptr.addr() - current_stack.ss_size }; - Some(stackaddr as *mut libc::c_void) + Some(stack_ptr.with_addr(stack_addr)) } #[cfg(any( @@ -557,7 +560,8 @@ pub mod guard { unsafe fn get_stack_start_aligned() -> Option<*mut libc::c_void> { let page_size = PAGE_SIZE.load(Ordering::Relaxed); assert!(page_size != 0); - let stackaddr = get_stack_start()?; + let stackptr = get_stack_start()?; + let stackaddr = stackptr.addr(); // Ensure stackaddr is page aligned! A parent process might // have reset RLIMIT_STACK to be non-page aligned. The @@ -565,11 +569,11 @@ pub mod guard { // stackaddr < stackaddr + stacksize, so if stackaddr is not // page-aligned, calculate the fix such that stackaddr < // new_page_aligned_stackaddr < stackaddr + stacksize - let remainder = (stackaddr as usize) % page_size; + let remainder = (stackaddr) % page_size; Some(if remainder == 0 { - stackaddr + stackptr } else { - ((stackaddr as usize) + page_size - remainder) as *mut libc::c_void + stackptr.with_addr(stackaddr + page_size - remainder) }) } @@ -588,8 +592,8 @@ pub mod guard { // Instead, we'll just note where we expect rlimit to start // faulting, so our handler can report "stack overflow", and // trust that the kernel's own stack guard will work. - let stackaddr = get_stack_start_aligned()?; - let stackaddr = stackaddr as usize; + let stackptr = get_stack_start_aligned()?; + let stackaddr = stackptr.addr(); Some(stackaddr - page_size..stackaddr) } else if cfg!(all(target_os = "linux", target_env = "musl")) { // For the main thread, the musl's pthread_attr_getstack @@ -602,8 +606,8 @@ pub mod guard { // at the bottom. If we try to remap the bottom of the stack // ourselves, FreeBSD's guard page moves upwards. So we'll just use // the builtin guard page. - let stackaddr = get_stack_start_aligned()?; - let guardaddr = stackaddr as usize; + let stackptr = get_stack_start_aligned()?; + let guardaddr = stackptr.addr(); // Technically the number of guard pages is tunable and controlled // by the security.bsd.stack_guard_page sysctl, but there are // few reasons to change it from the default. The default value has @@ -620,25 +624,25 @@ pub mod guard { // than the initial mmap() used, so we mmap() here with // read/write permissions and only then mprotect() it to // no permissions at all. See issue #50313. - let stackaddr = get_stack_start_aligned()?; + let stackptr = get_stack_start_aligned()?; let result = mmap( - stackaddr, + stackptr, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0, ); - if result != stackaddr || result == MAP_FAILED { + if result != stackptr || result == MAP_FAILED { panic!("failed to allocate a guard page: {}", io::Error::last_os_error()); } - let result = mprotect(stackaddr, page_size, PROT_NONE); + let result = mprotect(stackptr, page_size, PROT_NONE); if result != 0 { panic!("failed to protect the guard page: {}", io::Error::last_os_error()); } - let guardaddr = stackaddr as usize; + let guardaddr = stackptr.addr(); Some(guardaddr..guardaddr + page_size) } @@ -646,7 +650,8 @@ pub mod guard { #[cfg(any(target_os = "macos", target_os = "openbsd", target_os = "solaris"))] pub unsafe fn current() -> Option { - let stackaddr = get_stack_start()? as usize; + let stackptr = get_stack_start()?; + let stackaddr = stackptr.addr(); Some(stackaddr - PAGE_SIZE.load(Ordering::Relaxed)..stackaddr) } @@ -679,11 +684,11 @@ pub mod guard { panic!("there is no guard page"); } } - let mut stackaddr = crate::ptr::null_mut(); + let mut stackptr = crate::ptr::null_mut::(); let mut size = 0; - assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackaddr, &mut size), 0); + assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackptr, &mut size), 0); - let stackaddr = stackaddr as usize; + let stackaddr = stackptr.addr(); ret = if cfg!(any(target_os = "freebsd", target_os = "netbsd")) { Some(stackaddr - guardsize..stackaddr) } else if cfg!(all(target_os = "linux", target_env = "musl")) { diff --git a/library/std/src/sys/unix/weak.rs b/library/std/src/sys/unix/weak.rs index da63c068384a2..108bbbcf0a98f 100644 --- a/library/std/src/sys/unix/weak.rs +++ b/library/std/src/sys/unix/weak.rs @@ -22,10 +22,11 @@ // that, we'll just allow that some unix targets don't use this module at all. #![allow(dead_code, unused_macros)] -use crate::ffi::CStr; +use crate::ffi::{c_void, CStr}; use crate::marker::PhantomData; use crate::mem; -use crate::sync::atomic::{self, AtomicUsize, Ordering}; +use crate::ptr; +use crate::sync::atomic::{self, AtomicPtr, Ordering}; // We can use true weak linkage on ELF targets. #[cfg(not(any(target_os = "macos", target_os = "ios")))] @@ -83,13 +84,13 @@ pub(crate) macro dlsym { } pub(crate) struct DlsymWeak { name: &'static str, - addr: AtomicUsize, + addr: AtomicPtr, _marker: PhantomData, } impl DlsymWeak { pub(crate) const fn new(name: &'static str) -> Self { - DlsymWeak { name, addr: AtomicUsize::new(1), _marker: PhantomData } + DlsymWeak { name, addr: AtomicPtr::new(ptr::invalid_mut(1)), _marker: PhantomData } } #[inline] @@ -97,11 +98,12 @@ impl DlsymWeak { unsafe { // Relaxed is fine here because we fence before reading through the // pointer (see the comment below). - match self.addr.load(Ordering::Relaxed) { + let fn_ptr = self.addr.load(Ordering::Relaxed); + match fn_ptr.addr() { 1 => self.initialize(), 0 => None, - addr => { - let func = mem::transmute_copy::(&addr); + _ => { + let func = mem::transmute_copy::<*mut c_void, F>(&fn_ptr); // The caller is presumably going to read through this value // (by calling the function we've dlsymed). This means we'd // need to have loaded it with at least C11's consume @@ -129,25 +131,25 @@ impl DlsymWeak { // Cold because it should only happen during first-time initialization. #[cold] unsafe fn initialize(&self) -> Option { - assert_eq!(mem::size_of::(), mem::size_of::()); + assert_eq!(mem::size_of::(), mem::size_of::<*mut ()>()); let val = fetch(self.name); // This synchronizes with the acquire fence in `get`. self.addr.store(val, Ordering::Release); - match val { + match val.addr() { 0 => None, - addr => Some(mem::transmute_copy::(&addr)), + _ => Some(mem::transmute_copy::<*mut c_void, F>(&val)), } } } -unsafe fn fetch(name: &str) -> usize { +unsafe fn fetch(name: &str) -> *mut c_void { let name = match CStr::from_bytes_with_nul(name.as_bytes()) { Ok(cstr) => cstr, - Err(..) => return 0, + Err(..) => return ptr::null_mut(), }; - libc::dlsym(libc::RTLD_DEFAULT, name.as_ptr()) as usize + libc::dlsym(libc::RTLD_DEFAULT, name.as_ptr()) } #[cfg(not(any(target_os = "linux", target_os = "android")))] From f0161221b8e0235036beb400e504af17ea856b23 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Tue, 22 Mar 2022 21:30:48 -0400 Subject: [PATCH 06/10] WIP PROOF-OF-CONCEPT fix stdarch? --- library/stdarch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/stdarch b/library/stdarch index bcbe010614f39..820902a0522d9 160000 --- a/library/stdarch +++ b/library/stdarch @@ -1 +1 @@ -Subproject commit bcbe010614f398ec86f3a9274d22e33e5f2ee60b +Subproject commit 820902a0522d94b955538293449d9f50226082fe From 9052a68fb1e7062c0a43e4bcddc1a58ef7a2738c Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Tue, 22 Mar 2022 21:40:07 -0400 Subject: [PATCH 07/10] FIXUP PTR --- library/core/src/ptr/const_ptr.rs | 3 ++- library/core/src/ptr/mod.rs | 2 +- library/core/src/ptr/mut_ptr.rs | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/library/core/src/ptr/const_ptr.rs b/library/core/src/ptr/const_ptr.rs index 352794dc6d0dd..725d636bd657d 100644 --- a/library/core/src/ptr/const_ptr.rs +++ b/library/core/src/ptr/const_ptr.rs @@ -73,6 +73,7 @@ impl *const T { where T: Sized, { + // SAFETY: I AM THE MAGIC unsafe { core::mem::transmute(self) } } @@ -153,7 +154,7 @@ impl *const T { /// Here is an example of how to properly use this API to mess around /// with tagged pointers. Here we have a tag in the lowest bit: /// - /// ```ignore + /// ```text /// let my_tagged_ptr: *const T = ...; /// /// // Get the address and do whatever bit tricks we like diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index b1a8ded66c611..3f0cb42a469e2 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -273,7 +273,7 @@ pub const fn null_mut() -> *mut T { /// It's sound for an API to use an instance of a ZST to enforce some important /// safety property. So for instance, you can make an API like this: /// -/// ```ignore +/// ```text /// pub struct Step1Token(_private_to_construct: ()); /// /// pub fn step1() -> Step1Token { ... } diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs index 088f4e926f6ce..5b14202fcde64 100644 --- a/library/core/src/ptr/mut_ptr.rs +++ b/library/core/src/ptr/mut_ptr.rs @@ -76,6 +76,7 @@ impl *mut T { where T: Sized, { + // SAFETY: I AM THE MAGIC unsafe { core::mem::transmute(self) } } @@ -156,7 +157,7 @@ impl *mut T { /// Here is an example of how to properly use this API to mess around /// with tagged pointers. Here we have a tag in the lowest bit: /// - /// ```ignore + /// ```text /// let my_tagged_ptr: *mut T = ...; /// /// // Get the address and do whatever bit tricks we like From 4e588d152881cb0679368d62a8ab4047e5f99c28 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Tue, 22 Mar 2022 22:42:09 -0400 Subject: [PATCH 08/10] unblech a dep --- library/stdarch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/stdarch b/library/stdarch index 820902a0522d9..bcbe010614f39 160000 --- a/library/stdarch +++ b/library/stdarch @@ -1 +1 @@ -Subproject commit 820902a0522d94b955538293449d9f50226082fe +Subproject commit bcbe010614f398ec86f3a9274d22e33e5f2ee60b From e60b84b95f80974b1c6e3751d7d6dc86c857ab0e Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Tue, 22 Mar 2022 22:46:13 -0400 Subject: [PATCH 09/10] default the lint to allow --- compiler/rustc_lint_defs/src/builtin.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler/rustc_lint_defs/src/builtin.rs b/compiler/rustc_lint_defs/src/builtin.rs index 6933b20457811..f71b68ab2cabe 100644 --- a/compiler/rustc_lint_defs/src/builtin.rs +++ b/compiler/rustc_lint_defs/src/builtin.rs @@ -2676,7 +2676,7 @@ declare_lint! { /// [future-incompatible]: ../index.md#future-incompatible-lints /// [issue #9999999]: https://github.com/rust-lang/rust/issues/9999999 pub FUZZY_PROVENANCE_CASTS, - Warn, + Allow, "A lossy pointer-integer integer cast is used", @future_incompatible = FutureIncompatibleInfo { reference: "issue #9999999 ", From 8643aa48e46f6c5526149b3f49cb53c942193906 Mon Sep 17 00:00:00 2001 From: Alexis Beingessner Date: Tue, 22 Mar 2022 23:07:07 -0400 Subject: [PATCH 10/10] fixup ptr docs --- library/core/src/ptr/const_ptr.rs | 7 +++---- library/core/src/ptr/mod.rs | 4 ++-- library/core/src/ptr/mut_ptr.rs | 7 +++---- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/library/core/src/ptr/const_ptr.rs b/library/core/src/ptr/const_ptr.rs index 725d636bd657d..e0ae31aa58a71 100644 --- a/library/core/src/ptr/const_ptr.rs +++ b/library/core/src/ptr/const_ptr.rs @@ -67,7 +67,8 @@ impl *const T { /// and cannot be created from one without additional context. /// /// If you would like to treat a pointer like an integer anyway, - /// see [`addr`][] and [`with_addr`][] for the responsible way to do that. + /// see [`addr`][#method.addr-1] and [`with_addr`][#method.with_addr-1] for the responsible + /// way to do that. #[unstable(feature = "ptr_to_from_bits", issue = "91126")] pub fn to_bits(self) -> [u8; core::mem::size_of::<*const ()>()] where @@ -109,7 +110,7 @@ impl *const T { /// and is equivalent to the deprecated `ptr as usize` cast. /// /// On more complicated platforms like CHERI and segmented architectures, - /// this may remove some important metadata. See [`with_addr`][] for + /// this may remove some important metadata. See [`with_addr`][#method.with_addr-1] for /// details on this distinction and why it's important. #[unstable(feature = "strict_provenance", issue = "99999999")] pub fn addr(self) -> usize @@ -122,8 +123,6 @@ impl *const T { /// Creates a new pointer with the given address. /// - /// See also: [`ptr::fake_alloc`][] and [`ptr::zst_exists`][]. - /// /// This replaces the deprecated `usize as ptr` cast, which had /// fundamentally broken semantics because it couldn't restore /// *segment* and *provenance*. diff --git a/library/core/src/ptr/mod.rs b/library/core/src/ptr/mod.rs index 3f0cb42a469e2..110f31ae69813 100644 --- a/library/core/src/ptr/mod.rs +++ b/library/core/src/ptr/mod.rs @@ -93,7 +93,7 @@ //! [zst]: ../../nomicon/exotic-sizes.html#zero-sized-types-zsts //! [atomic operations]: crate::sync::atomic //! [`offset`]: pointer::offset -//! [`zst_exists`]: pointer::zst_exists +//! [`zst_exists`]: core::ptr::zst_exists #![stable(feature = "rust1", since = "1.0.0")] @@ -337,7 +337,7 @@ where /// This is a problem for Pointer Provenance and Segmenting, because there /// is no "chain of custody" to an allocation. One possible solution to this /// is for the programmer to Pretend To Be Malloc and "allocate" the address. -/// See [`with_addr`] for more details. +/// See [`pointer::with_addr`] for more details. /// /// Just as with *real* malloc, the compiler is free to assume the pointer /// returned from this function is completely unaliased, and that all accesses diff --git a/library/core/src/ptr/mut_ptr.rs b/library/core/src/ptr/mut_ptr.rs index 5b14202fcde64..eae15f1c0937c 100644 --- a/library/core/src/ptr/mut_ptr.rs +++ b/library/core/src/ptr/mut_ptr.rs @@ -70,7 +70,8 @@ impl *mut T { /// and cannot be created from one without additional context. /// /// If you would like to treat a pointer like an integer anyway, - /// see [`addr`][] and [`with_addr`][] for the responsible way to do that. + /// see [`addr`][#method.addr-1] and [`with_addr`][#method.with_addr-1] for + /// the responsible way to do that. #[unstable(feature = "ptr_to_from_bits", issue = "91126")] pub fn to_bits(self) -> [u8; core::mem::size_of::<*mut ()>()] where @@ -112,7 +113,7 @@ impl *mut T { /// and is equivalent to the deprecated `ptr as usize` cast. /// /// On more complicated platforms like CHERI and segmented architectures, - /// this may remove some important metadata. See [`with_addr`][] for + /// this may remove some important metadata. See [`with_addr`][#method.with_addr-1] for /// details on this distinction and why it's important. #[unstable(feature = "strict_provenance", issue = "99999999")] pub fn addr(self) -> usize @@ -125,8 +126,6 @@ impl *mut T { /// Creates a new pointer with the given address. /// - /// See also: [`ptr::fake_alloc`][] and [`ptr::zst_exists`][]. - /// /// This replaces the deprecated `usize as ptr` cast, which had /// fundamentally broken semantics because it couldn't restore /// *segment* and *provenance*.