rust-lang · emarteca · Aug 2, 2022 · Aug 3, 2022 · Aug 3, 2022 · Aug 4, 2022
diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs
@@ -517,6 +517,13 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
         )
     }
 
+    /// Get the base address for the bytes in an `Allocation` specified by the
+    /// `AllocID` passed in; error if no such allocation exists.
+    pub fn get_alloc_base_addr(&self, id: AllocId) -> InterpResult<'tcx, usize> {
-    /// Get the base address for the bytes in an `Allocation` specified by the
-    /// `AllocID` passed in; error if no such allocation exists.
-    pub fn get_alloc_base_addr(&self, id: AllocId) -> InterpResult<'tcx, usize> {
+    /// Get the base address for the bytes in an `Allocation` specified by the
+    /// `AllocID` passed in; error if no such allocation exists. The address will
+    /// be exposed (on the host system!).
+    ///
+    /// It is up to the caller to take sufficient care when using this address:
+    /// there could be provenance or uninit memory in there, and other memory
+    /// accesses could invalidate the exposed pointer.
+    pub fn expose_alloc_base_addr(&self, id: AllocId) -> InterpResult<'tcx, usize> {
-    /// Get the base address for the bytes in an `Allocation` specified by the
-    /// `AllocID` passed in; error if no such allocation exists.
-    pub fn get_alloc_base_addr(&self, id: AllocId) -> InterpResult<'tcx, usize> {
+    /// Get the base address for the bytes in an `Allocation` specified by the
+    /// `AllocID` passed in; error if no such allocation exists. The address will
+    /// be exposed (on the host system!).
+    ///
+    /// It is up to the caller to take sufficient care when using this address:
+    /// there could be provenance or uninit memory in there, and other memory
+    /// accesses could invalidate the exposed pointer.
+    pub fn expose_alloc_base_addr(&self, id: AllocId) -> InterpResult<'tcx, usize> {
+        let alloc = self.get_alloc_raw(id)?;
+        Ok(alloc.expose_base_addr())
+    }
+
     /// Gives raw access to the `Allocation`, without bounds or alignment checks.
     /// The caller is responsible for calling the access hooks!
     ///

diff --git a/compiler/rustc_middle/src/lib.rs b/compiler/rustc_middle/src/lib.rs
@@ -23,6 +23,7 @@
 //! This API is completely unstable and subject to change.
 
 #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
+#![feature(maybe_uninit_write_slice)]
 #![feature(allocator_api)]
 #![feature(array_windows)]
 #![feature(assert_matches)]
@@ -59,6 +60,7 @@
 #![feature(intra_doc_pointers)]
 #![feature(yeet_expr)]
 #![feature(const_option)]
+#![feature(vec_into_raw_parts)]
 #![recursion_limit = "512"]
 #![allow(rustc::potential_query_instability)]
 

diff --git a/compiler/rustc_middle/src/mir/interpret/allocation.rs b/compiler/rustc_middle/src/mir/interpret/allocation.rs
@@ -4,6 +4,7 @@ use std::borrow::Cow;
 use std::convert::{TryFrom, TryInto};
 use std::fmt;
 use std::hash;
+use std::hash::Hash;
 use std::iter;
 use std::ops::{Deref, Range};
 use std::ptr;
@@ -21,6 +22,98 @@ use super::{
 };
 use crate::ty;
 
+/// Functionality required for the bytes of an `Allocation`.
+pub trait AllocBytes:
+    Clone + core::fmt::Debug + Eq + PartialEq + PartialOrd + Ord + core::hash::Hash
+{
+    /// The length of the bytes.
+    fn get_len(&self) -> usize;
+    /// The address of the bytes.
+    fn expose_addr(&self) -> u64;
+    /// Get a slice of the bytes corresponding to a specified range.
+    fn get_slice_from_range(&self, range: Range<usize>) -> &[u8];
+    /// Mutable slice of the bytes corresponding to a specified range.
+    fn get_slice_from_range_mut<'a>(&'a mut self, range: Range<usize>) -> &'a mut [u8];
+    /// Add to the pointer of the head of the bytes, and return a mutable pointer to this location.
+    fn add_ptr(&mut self, to_add: usize) -> *mut u8;
+    /// Hash the head and tail of the bytes.
+    /// This is required to statisfy the `Hash` trait.
+    fn hash_head_tail<H: hash::Hasher>(
+        &self,
+        _byte_count: usize,
+        _state: &mut H,
+        _max_bytes_to_hash: usize,
+    ) {
+    }
+    /// Adjust the bytes to the specified alignment -- by default, this is a no-op.
+    fn adjust_to_align(self, _align: Align) -> Self {
+        self
+    }
+    /// Create an `AllocBytes` from a slice of `u8`.
+    fn from_bytes<'a>(slice: impl Into<Cow<'a, [u8]>>, _align: Align) -> Self;
+    /// Create an uninitialized `AllocBytes` of the specified size and alignment;
+    /// call the callback error handler if there is an error in allocating the memory.
+    fn uninit<'tcx, F: Fn() -> InterpError<'tcx>>(
+        size: Size,
+        _align: Align,
+        handle_alloc_fail: F,
+    ) -> Result<Self, InterpError<'tcx>>;
+}
+
+// Default `bytes` for `Allocation` is a `Box<[u8]>`.
+impl AllocBytes for Box<[u8]> {
+    fn uninit<'tcx, F: Fn() -> InterpError<'tcx>>(
+        size: Size,
+        _align: Align,
+        handle_alloc_fail: F,
+    ) -> Result<Self, InterpError<'tcx>> {
+        let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize())
+            .map_err(|_| handle_alloc_fail())?;
+        // SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]>
+        let bytes = unsafe { bytes.assume_init() };
+        Ok(bytes)
+    }
+
+    fn from_bytes<'a>(slice: impl Into<Cow<'a, [u8]>>, _align: Align) -> Self {
+        Box::<[u8]>::from(slice.into())
+    }
+
+    /// The length of the bytes.
+    fn get_len(&self) -> usize {
+        self.len()
+    }
+
+    /// The real address of the bytes.
+    fn expose_addr(&self) -> u64 {
+        self.as_ptr() as u64
+    }
+
+    /// Slice of the bytes, for a specified range.
+    fn get_slice_from_range(&self, range: Range<usize>) -> &[u8] {
+        &self[range]
+    }
+
+    /// Mutable slice of the bytes, for a specified range.
+    fn get_slice_from_range_mut<'a>(&'a mut self, range: Range<usize>) -> &'a mut [u8] {
+        &mut self[range]
+    }
+
+    /// Pointer addition to the base address of the bytes.
+    fn add_ptr(&mut self, to_add: usize) -> *mut u8 {
+        self.as_mut_ptr().wrapping_add(to_add)
+    }
+
+    fn hash_head_tail<H: hash::Hasher>(
+        &self,
+        _byte_count: usize,
+        _state: &mut H,
+        _max_bytes_to_hash: usize,
+    ) {
+        self[.._max_bytes_to_hash].hash(_state);
+        self[_byte_count - _max_bytes_to_hash..].hash(_state);
+    }
+}
+
 /// This type represents an Allocation in the Miri/CTFE core engine.
 ///
 /// Its public API is rather low-level, working directly with allocation offsets and a custom error
@@ -30,10 +123,10 @@ use crate::ty;
 // hashed. (see the `Hash` impl below for more details), so the impl is not derived.
 #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)]
 #[derive(HashStable)]
-pub struct Allocation<Prov = AllocId, Extra = ()> {
+pub struct Allocation<Prov = AllocId, Extra = (), Bytes = Box<[u8]>> {
     /// The actual bytes of the allocation.
     /// Note that the bytes of a pointer represent the offset of the pointer.
-    bytes: Box<[u8]>,
+    bytes: Bytes,
     /// Maps from byte addresses to extra provenance data for each pointer.
     /// Only the first byte of a pointer is inserted into the map; i.e.,
     /// every entry in this map applies to `pointer_size` consecutive bytes starting
@@ -71,14 +164,13 @@ impl hash::Hash for Allocation {
     fn hash<H: hash::Hasher>(&self, state: &mut H) {
         // Partially hash the `bytes` buffer when it is large. To limit collisions with common
         // prefixes and suffixes, we hash the length and some slices of the buffer.
-        let byte_count = self.bytes.len();
+        let byte_count = self.bytes.get_len();
         if byte_count > MAX_HASHED_BUFFER_LEN {
             // Hash the buffer's length.
             byte_count.hash(state);
 
-            // And its head and tail.
-            self.bytes[..MAX_BYTES_TO_HASH].hash(state);
-            self.bytes[byte_count - MAX_BYTES_TO_HASH..].hash(state);
+            // And its head and tail, if it is a Box<[u8]>.
+            self.bytes.hash_head_tail(byte_count, state, MAX_BYTES_TO_HASH);
         } else {
             self.bytes.hash(state);
         }
@@ -205,15 +297,17 @@ impl AllocRange {
 }
 
 // The constructors are all without extra; the extra gets added by a machine hook later.
-impl<Prov> Allocation<Prov> {
+impl<Prov, Bytes: AllocBytes> Allocation<Prov, (), Bytes> {
     /// Creates an allocation initialized by the given bytes
+    // FIXME! ellen make this generic for bytes
     pub fn from_bytes<'a>(
         slice: impl Into<Cow<'a, [u8]>>,
         align: Align,
         mutability: Mutability,
     ) -> Self {
-        let bytes = Box::<[u8]>::from(slice.into());
-        let size = Size::from_bytes(bytes.len());
+        let bytes = Bytes::from_bytes(slice, align);
+        let size = Size::from_bytes(bytes.get_len());
+
         Self {
             bytes,
             provenance: ProvenanceMap::new(),
@@ -233,7 +327,7 @@ impl<Prov> Allocation<Prov> {
     ///
     /// If `panic_on_fail` is true, this will never return `Err`.
     pub fn uninit<'tcx>(size: Size, align: Align, panic_on_fail: bool) -> InterpResult<'tcx, Self> {
-        let bytes = Box::<[u8]>::try_new_zeroed_slice(size.bytes_usize()).map_err(|_| {
+        let handle_alloc_fail = || -> InterpError<'tcx> {
             // This results in an error that can happen non-deterministically, since the memory
             // available to the compiler can change between runs. Normally queries are always
             // deterministic. However, we can be non-deterministic here because all uses of const
@@ -246,9 +340,10 @@ impl<Prov> Allocation<Prov> {
                 tcx.sess.delay_span_bug(DUMMY_SP, "exhausted memory during interpretation")
             });
             InterpError::ResourceExhaustion(ResourceExhaustionInfo::MemoryExhausted)
-        })?;
-        // SAFETY: the box was zero-allocated, which is a valid initial value for Box<[u8]>
-        let bytes = unsafe { bytes.assume_init() };
+        };
+
+        let bytes = Bytes::uninit(size, align, handle_alloc_fail)?;
+
         Ok(Allocation {
             bytes,
             provenance: ProvenanceMap::new(),
@@ -260,29 +355,33 @@ impl<Prov> Allocation<Prov> {
     }
 }
 
-impl Allocation {
+impl<Bytes: AllocBytes> Allocation<AllocId, (), Bytes> {
     /// Adjust allocation from the ones in tcx to a custom Machine instance
     /// with a different Provenance and Extra type.
+    // FIXME! ellen make this generic for Bytes
     pub fn adjust_from_tcx<Prov, Extra, Err>(
         self,
         cx: &impl HasDataLayout,
         extra: Extra,
         mut adjust_ptr: impl FnMut(Pointer<AllocId>) -> Result<Pointer<Prov>, Err>,
-    ) -> Result<Allocation<Prov, Extra>, Err> {
+    ) -> Result<Allocation<Prov, Extra, Bytes>, Err> {
         // Compute new pointer provenance, which also adjusts the bytes.
-        let mut bytes = self.bytes;
+        // Realign the pointer
+        let mut bytes = self.bytes.adjust_to_align(self.align);
+
         let mut new_provenance = Vec::with_capacity(self.provenance.0.len());
         let ptr_size = cx.data_layout().pointer_size.bytes_usize();
         let endian = cx.data_layout().endian;
         for &(offset, alloc_id) in self.provenance.iter() {
             let idx = offset.bytes_usize();
-            let ptr_bytes = &mut bytes[idx..idx + ptr_size];
+            let ptr_bytes = bytes.get_slice_from_range_mut(idx..idx + ptr_size); //&mut bytes[idx..idx + ptr_size];
             let bits = read_target_uint(endian, ptr_bytes).unwrap();
             let (ptr_prov, ptr_offset) =
                 adjust_ptr(Pointer::new(alloc_id, Size::from_bytes(bits)))?.into_parts();
             write_target_uint(endian, ptr_bytes, ptr_offset.bytes().into()).unwrap();
             new_provenance.push((offset, ptr_prov));
         }
+
         // Create allocation.
         Ok(Allocation {
             bytes,
@@ -298,7 +397,7 @@ impl Allocation {
 /// Raw accessors. Provide access to otherwise private bytes.
 impl<Prov, Extra> Allocation<Prov, Extra> {
     pub fn len(&self) -> usize {
-        self.bytes.len()
+        self.bytes.get_len()
     }
 
     pub fn size(&self) -> Size {
@@ -310,7 +409,7 @@ impl<Prov, Extra> Allocation<Prov, Extra> {
     /// edges) at all.
     /// This must not be used for reads affecting the interpreter execution.
     pub fn inspect_with_uninit_and_ptr_outside_interpreter(&self, range: Range<usize>) -> &[u8] {
-        &self.bytes[range]
+        self.bytes.get_slice_from_range(range)
     }
 
     /// Returns the mask indicating which bytes are initialized.
@@ -326,7 +425,16 @@ impl<Prov, Extra> Allocation<Prov, Extra> {
 
 /// Byte accessors.
 impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
-    /// This is the entirely abstraction-violating way to just grab the raw bytes without
+    /// Get the pointer of the [u8] of bytes.
+    pub fn expose_base_addr(&self) -> usize {
+        self.bytes.expose_addr().try_into().unwrap()
+    }
+
+    /// The last argument controls whether we error out when there are uninitialized or pointer
+    /// bytes. However, we *always* error when there are relocations overlapping the edges of the
+    /// range.
+    ///
+    /// You should never call this, call `get_bytes` or `get_bytes_with_uninit_and_ptr` instead,
     /// caring about provenance or initialization.
     ///
     /// This function also guarantees that the resulting pointer will remain stable
@@ -372,7 +480,9 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
         self.mark_init(range, true);
         self.clear_provenance(cx, range)?;
 
-        Ok(&mut self.bytes[range.start.bytes_usize()..range.end().bytes_usize()])
+        Ok(self
+            .bytes
+            .get_slice_from_range_mut(range.start.bytes_usize()..range.end().bytes_usize()))
     }
 
     /// A raw pointer variant of `get_bytes_mut` that avoids invalidating existing aliases into this memory.
@@ -384,8 +494,8 @@ impl<Prov: Provenance, Extra> Allocation<Prov, Extra> {
         self.mark_init(range, true);
         self.clear_provenance(cx, range)?;
 
-        assert!(range.end().bytes_usize() <= self.bytes.len()); // need to do our own bounds-check
-        let begin_ptr = self.bytes.as_mut_ptr().wrapping_add(range.start.bytes_usize());
+        assert!(range.end().bytes_usize() <= self.bytes.get_len()); // need to do our own bounds-check
+        let begin_ptr = self.bytes.add_ptr(range.start.bytes_usize());
         let len = range.end().bytes_usize() - range.start.bytes_usize();
         Ok(ptr::slice_from_raw_parts_mut(begin_ptr, len))
     }