From 430f19a82eb95aac1d6ac3dbd9688288cfcbe602 Mon Sep 17 00:00:00 2001 From: Alexis Bourget Date: Sun, 2 Aug 2020 20:03:48 +0200 Subject: [PATCH 1/6] Document unsafety in library/core/src/slice/mod.rs --- library/core/src/slice/mod.rs | 129 +++++++++++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 10 deletions(-) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 93608a1ce4864..3af8a3e46c91c 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -1,5 +1,4 @@ // ignore-tidy-filelength -// ignore-tidy-undocumented-unsafe //! Slice management and manipulation. //! @@ -70,6 +69,8 @@ impl [T] { #[allow(unused_attributes)] #[allow_internal_unstable(const_fn_union)] pub const fn len(&self) -> usize { + // SAFETY: this is safe because `&[T]` and `FatPtr` have the same layout. + // Only `std` can make this guarantee. unsafe { crate::ptr::Repr { rust: self }.raw.len } } @@ -443,7 +444,8 @@ impl [T] { #[unstable(feature = "slice_ptr_range", issue = "65807")] #[inline] pub fn as_ptr_range(&self) -> Range<*const T> { - // The `add` here is safe, because: + let start = self.as_ptr(); + // SAFETY: The `add` here is safe, because: // // - Both pointers are part of the same object, as pointing directly // past the object also counts. @@ -460,7 +462,6 @@ impl [T] { // the end of the address space. // // See the documentation of pointer::add. - let start = self.as_ptr(); let end = unsafe { start.add(self.len()) }; start..end } @@ -484,8 +485,8 @@ impl [T] { #[unstable(feature = "slice_ptr_range", issue = "65807")] #[inline] pub fn as_mut_ptr_range(&mut self) -> Range<*mut T> { - // See as_ptr_range() above for why `add` here is safe. let start = self.as_mut_ptr(); + // SAFETY: See as_ptr_range() above for why `add` here is safe. let end = unsafe { start.add(self.len()) }; start..end } @@ -511,6 +512,10 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn swap(&mut self, a: usize, b: usize) { + // SAFETY: `pa` and `pb` have been created from safe mutable references and refer + // to elements in the slice and therefore are guaranteed to be valid and aligned. + // Note that accessing the elements behind `a` and `b` is checked and will + // panic when out of bounds. unsafe { // Can't take two mutable loans from one vector, so instead just cast // them to their raw pointers to do the swap @@ -554,6 +559,17 @@ impl [T] { // Use the llvm.bswap intrinsic to reverse u8s in a usize let chunk = mem::size_of::(); while i + chunk - 1 < ln / 2 { + // SAFETY: An unaligned u32 can be read from `i` if `i + 1 < ln` + // (and obviously `i < ln`), because each element is 2 bytes and + // we're reading 4. + // `i + chunk - 1 < ln / 2` # while condition + // `i + 2 - 1 < ln / 2` + // `i + 1 < ln / 2` + // Since it's less than the length divided by 2, then it must be + // in bounds. + // + // Note: when updating this comment, update the others in the + // function too. unsafe { let pa: *mut T = self.get_unchecked_mut(i); let pb: *mut T = self.get_unchecked_mut(ln - i - chunk); @@ -570,6 +586,17 @@ impl [T] { // Use rotate-by-16 to reverse u16s in a u32 let chunk = mem::size_of::() / 2; while i + chunk - 1 < ln / 2 { + // SAFETY: An unaligned u32 can be read from `i` if `i + 1 < ln` + // (and obviously `i < ln`), because each element is 2 bytes and + // we're reading 4. + // `i + chunk - 1 < ln / 2` # while condition + // `i + 2 - 1 < ln / 2` + // `i + 1 < ln / 2` + // Since it's less than the length divided by 2, then it must be + // in bounds. + // + // Note: when updating this comment, update the others in the + // function too. unsafe { let pa: *mut T = self.get_unchecked_mut(i); let pb: *mut T = self.get_unchecked_mut(ln - i - chunk); @@ -583,8 +610,13 @@ impl [T] { } while i < ln / 2 { - // Unsafe swap to avoid the bounds check in safe swap. + // SAFETY: `i` is inferior to half the length of the slice so + // accessing `i` and `ln - i - 1` is safe (`i` starts at 0 and + // will not go further than `ln / 2 - 1`). + // The resulting pointers `pa` and `pb` are therefore valid and + // aligned, and can be read from and written to. unsafe { + // Unsafe swap to avoid the bounds check in safe swap. let pa: *mut T = self.get_unchecked_mut(i); let pb: *mut T = self.get_unchecked_mut(ln - i - 1); ptr::swap(pa, pb); @@ -609,6 +641,9 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn iter(&self) -> Iter<'_, T> { + // SAFETY: adding `self.len()` to the starting pointer gives a pointer + // at the end of `self`, which fulfills the expectations of `ptr.add()` + // and `NonNull::new_unchecked()`. unsafe { let ptr = self.as_ptr(); assume(!ptr.is_null()); @@ -637,6 +672,9 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn iter_mut(&mut self) -> IterMut<'_, T> { + // SAFETY: adding `self.len()` to the starting pointer gives a pointer + // at the end of `self`, which fulfills the expectations of `ptr.add()` + // and `NonNull::new_unchecked()`. unsafe { let ptr = self.as_mut_ptr(); assume(!ptr.is_null()); @@ -1107,6 +1145,8 @@ impl [T] { let len = self.len(); let ptr = self.as_mut_ptr(); + // SAFETY: `[ptr; mid]` and `[mid; len]` are inside `self`, which + // fulfills the requirements of `from_raw_parts_mut`. unsafe { assert!(mid <= len); @@ -1655,14 +1695,14 @@ impl [T] { while size > 1 { let half = size / 2; let mid = base + half; - // mid is always in [0, size), that means mid is >= 0 and < size. + // SAFETY: // mid >= 0: by definition // mid < size: mid = size / 2 + size / 4 + size / 8 ... let cmp = f(unsafe { s.get_unchecked(mid) }); base = if cmp == Greater { base } else { mid }; size -= half; } - // base is always in [0, size) because base <= mid. + // SAFETY: base is always in [0, size) because base <= mid. let cmp = f(unsafe { s.get_unchecked(base) }); if cmp == Equal { Ok(base) } else { Err(base + (cmp == Less) as usize) } } @@ -2120,6 +2160,16 @@ impl [T] { let mut next_read: usize = 1; let mut next_write: usize = 1; + // SAFETY: the `while` condition guarantees `next_read` and `next_write` + // are less than `len`, thus are inside `self`. `prev_ptr_write` points to + // one element before `ptr_write`, but `next_write` starts at 1, so + // `prev_ptr_write` is never less than 0 and is inside the slice. + // This fulfils the requirements for dereferencing `ptr_read`, `prev_ptr_write` + // and `ptr_write`, and for using `ptr.add(next_read)`, `ptr.add(next_write - 1)` + // and `prev_ptr_write.offset(1)`. + // + // `next_write` is also incremented at most once per loop at most meaning + // no element is skipped when it may need to be swapped. unsafe { // Avoid bounds checks by using raw pointers. while next_read < len { @@ -2204,6 +2254,8 @@ impl [T] { assert!(mid <= self.len()); let k = self.len() - mid; + // SAFETY: `[mid - mid;mid+k]` corresponds to the entire + // `self` slice, thus is valid for reads and writes. unsafe { let p = self.as_mut_ptr(); rotate::ptr_rotate(mid, p.add(mid), k); @@ -2245,6 +2297,8 @@ impl [T] { assert!(k <= self.len()); let mid = self.len() - k; + // SAFETY: `[mid - mid;mid+k]` corresponds to the entire + // `self` slice, thus is valid for reads and writes. unsafe { let p = self.as_mut_ptr(); rotate::ptr_rotate(mid, p.add(mid), k); @@ -2407,6 +2461,9 @@ impl [T] { T: Copy, { assert_eq!(self.len(), src.len(), "destination and source slices have different lengths"); + // SAFETY: `self` is valid for `self.len()` elements by definition, and `src` was + // checked to have the same length. The slices cannot overlap because + // mutable references are exclusive. unsafe { ptr::copy_nonoverlapping(src.as_ptr(), self.as_mut_ptr(), self.len()); } @@ -2460,6 +2517,7 @@ impl [T] { assert!(src_end <= self.len(), "src is out of bounds"); let count = src_end - src_start; assert!(dest <= self.len() - count, "dest is out of bounds"); + // SAFETY: the conditions for `ptr::copy` have all been checked above. unsafe { ptr::copy(self.as_ptr().add(src_start), self.as_mut_ptr().add(dest), count); } @@ -2515,6 +2573,9 @@ impl [T] { #[stable(feature = "swap_with_slice", since = "1.27.0")] pub fn swap_with_slice(&mut self, other: &mut [T]) { assert!(self.len() == other.len(), "destination and source slices have different lengths"); + // SAFETY: `self` is valid for `self.len()` elements by definition, and `src` was + // checked to have the same length. The slices cannot overlap because + // mutable references are exclusive. unsafe { ptr::swap_nonoverlapping(self.as_mut_ptr(), other.as_mut_ptr(), self.len()); } @@ -2546,6 +2607,8 @@ impl [T] { // iterative stein’s algorithm // We should still make this `const fn` (and revert to recursive algorithm if we do) // because relying on llvm to consteval all this is… well, it makes me uncomfortable. + + // SAFETY: `a` and `b` are checked to be non-zero values. let (ctz_a, mut ctz_b) = unsafe { if a == 0 { return b; @@ -2565,6 +2628,7 @@ impl [T] { mem::swap(&mut a, &mut b); } b = b - a; + // SAFETY: `b` is checked to be non-zero. unsafe { if b == 0 { break; @@ -3126,11 +3190,13 @@ unsafe impl SliceIndex<[T]> for usize { #[inline] fn get(self, slice: &[T]) -> Option<&T> { + // SAFETY: `self` is checked to be in bounds. if self < slice.len() { unsafe { Some(&*self.get_unchecked(slice)) } } else { None } } #[inline] fn get_mut(self, slice: &mut [T]) -> Option<&mut T> { + // SAFETY: `self` is checked to be in bounds. if self < slice.len() { unsafe { Some(&mut *self.get_unchecked_mut(slice)) } } else { None } } @@ -3171,6 +3237,7 @@ unsafe impl SliceIndex<[T]> for ops::Range { if self.start > self.end || self.end > slice.len() { None } else { + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { Some(&*self.get_unchecked(slice)) } } } @@ -3180,6 +3247,7 @@ unsafe impl SliceIndex<[T]> for ops::Range { if self.start > self.end || self.end > slice.len() { None } else { + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { Some(&mut *self.get_unchecked_mut(slice)) } } } @@ -3208,6 +3276,7 @@ unsafe impl SliceIndex<[T]> for ops::Range { } else if self.end > slice.len() { slice_end_index_len_fail(self.end, slice.len()); } + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { &*self.get_unchecked(slice) } } @@ -3218,6 +3287,7 @@ unsafe impl SliceIndex<[T]> for ops::Range { } else if self.end > slice.len() { slice_end_index_len_fail(self.end, slice.len()); } + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { &mut *self.get_unchecked_mut(slice) } } } @@ -3290,6 +3360,7 @@ unsafe impl SliceIndex<[T]> for ops::RangeFrom { if self.start > slice.len() { slice_start_index_len_fail(self.start, slice.len()); } + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { &*self.get_unchecked(slice) } } @@ -3298,6 +3369,7 @@ unsafe impl SliceIndex<[T]> for ops::RangeFrom { if self.start > slice.len() { slice_start_index_len_fail(self.start, slice.len()); } + // SAFETY: `self` is checked to be valid and in bounds above. unsafe { &mut *self.get_unchecked_mut(slice) } } } @@ -3543,6 +3615,9 @@ macro_rules! iterator { // Helper function for creating a slice from the iterator. #[inline(always)] fn make_slice(&self) -> &'a [T] { + // SAFETY: the iterator was created from a slice with pointer + // `self.ptr` and length `len!(self)`. This guarantees that all + // the prerequisites for `from_raw_parts` are fulfilled. unsafe { from_raw_parts(self.ptr.as_ptr(), len!(self)) } } @@ -3601,6 +3676,11 @@ macro_rules! iterator { #[inline] fn next(&mut self) -> Option<$elem> { // could be implemented with slices, but this avoids bounds checks + + // SAFETY: `assume` calls are safe since a slice's start pointer + // must be non-null, and slices over non-ZSTs must also have a + // non-null end pointer. The call to `next_unchecked!` is safe + // since we check if the iterator is empty first. unsafe { assume(!self.ptr.as_ptr().is_null()); if mem::size_of::() != 0 { @@ -3634,14 +3714,14 @@ macro_rules! iterator { // could be (due to wrapping). self.end = self.ptr.as_ptr(); } else { + // SAFETY: end can't be 0 if T isn't ZST because ptr isn't 0 and end >= ptr unsafe { - // End can't be 0 if T isn't ZST because ptr isn't 0 and end >= ptr self.ptr = NonNull::new_unchecked(self.end as *mut T); } } return None; } - // We are in bounds. `post_inc_start` does the right thing even for ZSTs. + // SAFETY: We are in bounds. `post_inc_start` does the right thing even for ZSTs. unsafe { self.post_inc_start(n as isize); Some(next_unchecked!(self)) @@ -3748,6 +3828,8 @@ macro_rules! iterator { let mut i = 0; while let Some(x) = self.next() { if predicate(x) { + // SAFETY: we are guaranteed to be in bounds by the loop invariant: + // when `i >= n`, `self.next()` returns `None` and the loop breaks. unsafe { assume(i < n) }; return Some(i); } @@ -3769,6 +3851,8 @@ macro_rules! iterator { while let Some(x) = self.next_back() { i -= 1; if predicate(x) { + // SAFETY: `i` must be lower than `n` since it starts at `n` + // and is only decreasing. unsafe { assume(i < n) }; return Some(i); } @@ -3784,6 +3868,11 @@ macro_rules! iterator { #[inline] fn next_back(&mut self) -> Option<$elem> { // could be implemented with slices, but this avoids bounds checks + + // SAFETY: `assume` calls are safe since a slice's start pointer must be non-null, + // and slices over non-ZSTs must also have a non-null end pointer. + // The call to `next_back_unchecked!` is safe since we check if the iterator is + // empty first. unsafe { assume(!self.ptr.as_ptr().is_null()); if mem::size_of::() != 0 { @@ -3804,7 +3893,7 @@ macro_rules! iterator { self.end = self.ptr.as_ptr(); return None; } - // We are in bounds. `pre_dec_end` does the right thing even for ZSTs. + // SAFETY: We are in bounds. `pre_dec_end` does the right thing even for ZSTs. unsafe { self.pre_dec_end(n as isize); Some(next_back_unchecked!(self)) @@ -3999,6 +4088,9 @@ impl<'a, T> IterMut<'a, T> { /// ``` #[stable(feature = "iter_to_slice", since = "1.4.0")] pub fn into_slice(self) -> &'a mut [T] { + // SAFETY: the iterator was created from a mutable slice with pointer + // `self.ptr` and length `len!(self)`. This guarantees that all the prerequisites + // for `from_raw_parts_mut` are fulfilled. unsafe { from_raw_parts_mut(self.ptr.as_ptr(), len!(self)) } } @@ -6288,12 +6380,20 @@ pub unsafe fn from_raw_parts_mut<'a, T>(data: *mut T, len: usize) -> &'a mut [T] /// Converts a reference to T into a slice of length 1 (without copying). #[stable(feature = "from_ref", since = "1.28.0")] pub fn from_ref(s: &T) -> &[T] { + // SAFETY: a reference is guaranteed to be valid for reads. The returned + // reference cannot be mutated as it is an immutable reference. + // `mem::size_of::()` cannot be larger than `isize::MAX`. + // Thus the call to `from_raw_parts` is safe. unsafe { from_raw_parts(s, 1) } } /// Converts a reference to T into a slice of length 1 (without copying). #[stable(feature = "from_ref", since = "1.28.0")] pub fn from_mut(s: &mut T) -> &mut [T] { + // SAFETY: a mutable reference is guaranteed to be valid for writes. + // The reference cannot be accessed by another pointer as it is an mutable reference. + // `mem::size_of::()` cannot be larger than `isize::MAX`. + // Thus the call to `from_raw_parts_mut` is safe. unsafe { from_raw_parts_mut(s, 1) } } @@ -6414,6 +6514,8 @@ where if self.as_ptr().guaranteed_eq(other.as_ptr()) { return true; } + // SAFETY: `self` and `other` are references and are thus guaranteed to be valid. + // The two slices have been checked to have the same size above. unsafe { let size = mem::size_of_val(self); memcmp(self.as_ptr() as *const u8, other.as_ptr() as *const u8, size) == 0 @@ -6516,6 +6618,9 @@ impl SliceOrd for u8 { #[inline] fn compare(left: &[Self], right: &[Self]) -> Ordering { let order = + // SAFETY: `left` and `right` are references and are thus guaranteed to be valid. + // We use the minimum of both lengths which guarantees that both regions are + // valid for reads in that interval. unsafe { memcmp(left.as_ptr(), right.as_ptr(), cmp::min(left.len(), right.len())) }; if order == 0 { left.len().cmp(&right.len()) @@ -6590,6 +6695,10 @@ impl SliceContains for u8 { impl SliceContains for i8 { fn slice_contains(&self, x: &[Self]) -> bool { let byte = *self as u8; + // SAFETY: `i8` and `u8` have the same memory layout, thus casting `x.as_ptr()` + // as `*const u8` is safe. The `x.as_ptr()` comes from a reference and is thus guaranteed + // to be valid for reads for the length of the slice `x.len()`, which cannot be larger + // than `isize::MAX`. The returned slice is never mutated. let bytes: &[u8] = unsafe { from_raw_parts(x.as_ptr() as *const u8, x.len()) }; memchr::memchr(byte, bytes).is_some() } From 92b1975eaaae0891cb8d5ed50fb7a17f37ce0b38 Mon Sep 17 00:00:00 2001 From: Alexis Bourget Date: Mon, 3 Aug 2020 22:16:50 +0200 Subject: [PATCH 2/6] Added the missing SAFETY: comments --- library/core/src/slice/mod.rs | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 3af8a3e46c91c..32ffdc26d6103 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -1695,9 +1695,9 @@ impl [T] { while size > 1 { let half = size / 2; let mid = base + half; - // SAFETY: - // mid >= 0: by definition - // mid < size: mid = size / 2 + size / 4 + size / 8 ... + // SAFETY: the call is made safe by the following inconstants: + // - `mid >= 0`: by definition + // - `mid < size`: `mid = size / 2 + size / 4 + size / 8 ...` let cmp = f(unsafe { s.get_unchecked(mid) }); base = if cmp == Greater { base } else { mid }; size -= half; @@ -2690,6 +2690,7 @@ impl [T] { // First, find at what point do we split between the first and 2nd slice. Easy with // ptr.align_offset. let ptr = self.as_ptr(); + // SAFETY: See the `align_to_mut` method for the detailed safety comment. let offset = unsafe { crate::ptr::align_offset(ptr, mem::align_of::()) }; if offset > self.len() { (self, &[], &[]) @@ -2749,6 +2750,13 @@ impl [T] { // First, find at what point do we split between the first and 2nd slice. Easy with // ptr.align_offset. let ptr = self.as_ptr(); + // SAFETY: Here we are ensuring we will use aligned pointers for U for the + // rest of the method. This is done by passing a pointer to &[T] with an + // alignment targeted for U. + // `crate::ptr::align_offset` is called with a correctly aligned and + // valid pointer `ptr` (it comes from a reference to `self`) and with + // a size that is a power of two (since it comes from the alignement for U), + // satisfying its safety constraints. let offset = unsafe { crate::ptr::align_offset(ptr, mem::align_of::()) }; if offset > self.len() { (self, &mut [], &mut []) @@ -2874,15 +2882,13 @@ impl [T] { while left != right { let mid = left + (right - left) / 2; - // SAFETY: - // When left < right, left <= mid < right. - // Therefore left always increases and right always decreases, - // and eigher of them is selected. - // In both cases left <= right is satisfied. - // Therefore if left < right in a step, - // left <= right is satisfied in the next step. - // Therefore as long as left != right, 0 <= left < right <= len is satisfied - // and if this case 0 <= mid < len is satisfied too. + // SAFETY: When `left < right`, `left <= mid < right`. + // Therefore `left` always increases and `right` always decreases, + // and either of them is selected. In both cases `left <= right` is + // satisfied. Therefore if `left < right` in a step, `left <= right` + // is satisfied in the next step. Therefore as long as `left != right`, + // `0 <= left < right <= len` is satisfied and if this case + // `0 <= mid < len` is satisfied too. let value = unsafe { self.get_unchecked(mid) }; if pred(value) { left = mid + 1; @@ -3002,7 +3008,8 @@ fn is_ascii(s: &[u8]) -> bool { // above. debug_assert!(offset_to_aligned <= len); - // word_ptr is the (properly aligned) usize ptr we use to read the middle chunk of the slice. + // SAFETY: word_ptr is the (properly aligned) usize ptr we use to read the + // middle chunk of the slice. let mut word_ptr = unsafe { start.add(offset_to_aligned) as *const usize }; // `byte_pos` is the byte index of `word_ptr`, used for loop end checks. @@ -5660,6 +5667,8 @@ impl FusedIterator for ArrayChunks<'_, T, N> {} #[unstable(feature = "array_chunks", issue = "74985")] unsafe impl<'a, T, const N: usize> TrustedRandomAccess for ArrayChunks<'a, T, N> { unsafe fn get_unchecked(&mut self, i: usize) -> &'a [T; N] { + // SAFETY: The safety guarantees of `get_unchecked` are transferred to + // the caller. unsafe { self.iter.get_unchecked(i) } } fn may_have_side_effect() -> bool { From 3a709fe7021218b613aa7fdce7794df5a8ff0da5 Mon Sep 17 00:00:00 2001 From: Alexis Bourget Date: Sat, 8 Aug 2020 15:40:10 +0200 Subject: [PATCH 3/6] Add precisions about ZSTs and fix nits raised in review --- library/core/src/slice/mod.rs | 82 ++++++++++++++++++++++++++--------- 1 file changed, 62 insertions(+), 20 deletions(-) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 32ffdc26d6103..4aba53c159b0c 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -512,15 +512,15 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn swap(&mut self, a: usize, b: usize) { + // Can't take two mutable loans from one vector, so instead just cast + // them to their raw pointers to do the swap. + let pa: *mut T = &mut self[a]; + let pb: *mut T = &mut self[b]; // SAFETY: `pa` and `pb` have been created from safe mutable references and refer // to elements in the slice and therefore are guaranteed to be valid and aligned. // Note that accessing the elements behind `a` and `b` is checked and will // panic when out of bounds. unsafe { - // Can't take two mutable loans from one vector, so instead just cast - // them to their raw pointers to do the swap - let pa: *mut T = &mut self[a]; - let pb: *mut T = &mut self[b]; ptr::swap(pa, pb); } } @@ -559,15 +559,21 @@ impl [T] { // Use the llvm.bswap intrinsic to reverse u8s in a usize let chunk = mem::size_of::(); while i + chunk - 1 < ln / 2 { - // SAFETY: An unaligned u32 can be read from `i` if `i + 1 < ln` - // (and obviously `i < ln`), because each element is 2 bytes and - // we're reading 4. + // SAFETY: An unaligned usize can be read from `i` if `i + 1 < ln` + // (and obviously `i < ln`), because each element is 1 byte and + // we're reading 2. + // // `i + chunk - 1 < ln / 2` # while condition // `i + 2 - 1 < ln / 2` // `i + 1 < ln / 2` + // // Since it's less than the length divided by 2, then it must be // in bounds. // + // This also means that the condition `0 < i + chunk <= ln` is + // always respected, ensuring the `pb` pointer can be used + // safely. + // // Note: when updating this comment, update the others in the // function too. unsafe { @@ -589,12 +595,18 @@ impl [T] { // SAFETY: An unaligned u32 can be read from `i` if `i + 1 < ln` // (and obviously `i < ln`), because each element is 2 bytes and // we're reading 4. + // // `i + chunk - 1 < ln / 2` # while condition // `i + 2 - 1 < ln / 2` // `i + 1 < ln / 2` + // // Since it's less than the length divided by 2, then it must be // in bounds. // + // This also means that the condition `0 < i + chunk <= ln` is + // always respected, ensuring the `pb` pointer can be used + // safely. + // // Note: when updating this comment, update the others in the // function too. unsafe { @@ -641,11 +653,23 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn iter(&self) -> Iter<'_, T> { - // SAFETY: adding `self.len()` to the starting pointer gives a pointer - // at the end of `self`, which fulfills the expectations of `ptr.add()` - // and `NonNull::new_unchecked()`. + let ptr = self.as_ptr(); + // SAFETY: There are several things here: + // + // `ptr` has been checked for nullity before being passed to `NonNull` via + // `new_unchecked`. + // + // Adding `self.len()` to the starting pointer gives a pointer + // at the end of `self`. `end` will never be dereferenced, only checked + // for direct pointer equality with `ptr` to check if the iterator is + // done. + // + // In the case of a ZST, the end pointer is just the start pointer plus + // the length, to also allows for the fast `ptr == end` check. + // + // See the `next_unchecked!` and `is_empty!` macros as well as the + // `post_inc_start` method for more informations. unsafe { - let ptr = self.as_ptr(); assume(!ptr.is_null()); let end = if mem::size_of::() == 0 { @@ -672,11 +696,23 @@ impl [T] { #[stable(feature = "rust1", since = "1.0.0")] #[inline] pub fn iter_mut(&mut self) -> IterMut<'_, T> { - // SAFETY: adding `self.len()` to the starting pointer gives a pointer - // at the end of `self`, which fulfills the expectations of `ptr.add()` - // and `NonNull::new_unchecked()`. + let ptr = self.as_mut_ptr(); + // SAFETY: There are several things here: + // + // `ptr` has been checked for nullity before being passed to `NonNull` via + // `new_unchecked`. + // + // Adding `self.len()` to the starting pointer gives a pointer + // at the end of `self`. `end` will never be dereferenced, only checked + // for direct pointer equality with `ptr` to check if the iterator is + // done. + // + // In the case of a ZST, the end pointer is just the start pointer plus + // the length, to also allows for the fast `ptr == end` check. + // + // See the `next_unchecked!` and `is_empty!` macros as well as the + // `post_inc_start` method for more informations. unsafe { - let ptr = self.as_mut_ptr(); assume(!ptr.is_null()); let end = if mem::size_of::() == 0 { @@ -2170,6 +2206,11 @@ impl [T] { // // `next_write` is also incremented at most once per loop at most meaning // no element is skipped when it may need to be swapped. + // + // `ptr_read` and `prev_ptr_write` never point to the same element. This + // is required for `&mut *ptr_read`, `&mut *prev_ptr_write` to be safe. + // The explanation is simply that `next_read >= next_write` is always true, + // thus `next_read > next_write - 1` is too. unsafe { // Avoid bounds checks by using raw pointers. while next_read < len { @@ -2253,11 +2294,11 @@ impl [T] { pub fn rotate_left(&mut self, mid: usize) { assert!(mid <= self.len()); let k = self.len() - mid; + let p = self.as_mut_ptr(); - // SAFETY: `[mid - mid;mid+k]` corresponds to the entire + // SAFETY: `[mid; mid+k]` corresponds to the entire // `self` slice, thus is valid for reads and writes. unsafe { - let p = self.as_mut_ptr(); rotate::ptr_rotate(mid, p.add(mid), k); } } @@ -2296,11 +2337,11 @@ impl [T] { pub fn rotate_right(&mut self, k: usize) { assert!(k <= self.len()); let mid = self.len() - k; + let p = self.as_mut_ptr(); - // SAFETY: `[mid - mid;mid+k]` corresponds to the entire + // SAFETY: `[mid; mid+k]` corresponds to the entire // `self` slice, thus is valid for reads and writes. unsafe { - let p = self.as_mut_ptr(); rotate::ptr_rotate(mid, p.add(mid), k); } } @@ -2517,7 +2558,8 @@ impl [T] { assert!(src_end <= self.len(), "src is out of bounds"); let count = src_end - src_start; assert!(dest <= self.len() - count, "dest is out of bounds"); - // SAFETY: the conditions for `ptr::copy` have all been checked above. + // SAFETY: the conditions for `ptr::copy` have all been checked above, + // as have those for `ptr::add`. unsafe { ptr::copy(self.as_ptr().add(src_start), self.as_mut_ptr().add(dest), count); } From 5a0de2f828db7c06624f55f69877197f9b26c753 Mon Sep 17 00:00:00 2001 From: Alexis Bourget Date: Sun, 9 Aug 2020 16:43:24 +0200 Subject: [PATCH 4/6] Improve safety comments for usize, fix some other unclear parts --- library/core/src/slice/mod.rs | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 4aba53c159b0c..f028d68baeaee 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -561,11 +561,15 @@ impl [T] { while i + chunk - 1 < ln / 2 { // SAFETY: An unaligned usize can be read from `i` if `i + 1 < ln` // (and obviously `i < ln`), because each element is 1 byte and - // we're reading 2. + // we're reading `chunk`. + // + // Since we checked for the `x86` and `x86_64` target before + // getting here so `chunk` is at most 8 bytes. // // `i + chunk - 1 < ln / 2` # while condition - // `i + 2 - 1 < ln / 2` - // `i + 1 < ln / 2` + // `i + 8 - 1 < ln / 2` + // `i + 7 < ln / 2` + // so obviously `i + 1 < ln / 2` // // Since it's less than the length divided by 2, then it must be // in bounds. @@ -656,8 +660,9 @@ impl [T] { let ptr = self.as_ptr(); // SAFETY: There are several things here: // - // `ptr` has been checked for nullity before being passed to `NonNull` via - // `new_unchecked`. + // `ptr` has been obtained by `self.as_ptr()` where `self` is a valid + // reference thus it is non-NUL and safe to use and pass to + // `NonNull::new_unchecked` . // // Adding `self.len()` to the starting pointer gives a pointer // at the end of `self`. `end` will never be dereferenced, only checked @@ -699,8 +704,9 @@ impl [T] { let ptr = self.as_mut_ptr(); // SAFETY: There are several things here: // - // `ptr` has been checked for nullity before being passed to `NonNull` via - // `new_unchecked`. + // `ptr` has been obtained by `self.as_ptr()` where `self` is a valid + // reference thus it is non-NUL and safe to use and pass to + // `NonNull::new_unchecked` . // // Adding `self.len()` to the starting pointer gives a pointer // at the end of `self`. `end` will never be dereferenced, only checked @@ -2296,8 +2302,8 @@ impl [T] { let k = self.len() - mid; let p = self.as_mut_ptr(); - // SAFETY: `[mid; mid+k]` corresponds to the entire - // `self` slice, thus is valid for reads and writes. + // SAFETY: The range `[p.add(mid) - mid, p.add(mid) + k)` is trivially + // valid for reading and writing, as required by `ptr_rotate`. unsafe { rotate::ptr_rotate(mid, p.add(mid), k); } @@ -2339,8 +2345,8 @@ impl [T] { let mid = self.len() - k; let p = self.as_mut_ptr(); - // SAFETY: `[mid; mid+k]` corresponds to the entire - // `self` slice, thus is valid for reads and writes. + // SAFETY: The range `[p.add(mid) - mid, p.add(mid) + k)` is trivially + // valid for reading and writing, as required by `ptr_rotate`. unsafe { rotate::ptr_rotate(mid, p.add(mid), k); } From 91ba92b6df4d1626c484108d4018f359f07b16ae Mon Sep 17 00:00:00 2001 From: Alexis Bourget Date: Tue, 11 Aug 2020 21:23:00 +0200 Subject: [PATCH 5/6] Change safety comment for usize with the one from LukasKalbertodt review --- library/core/src/slice/mod.rs | 40 +++++++++++++++-------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index f028d68baeaee..8ad3c414454ae 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -559,27 +559,24 @@ impl [T] { // Use the llvm.bswap intrinsic to reverse u8s in a usize let chunk = mem::size_of::(); while i + chunk - 1 < ln / 2 { - // SAFETY: An unaligned usize can be read from `i` if `i + 1 < ln` - // (and obviously `i < ln`), because each element is 1 byte and - // we're reading `chunk`. + // SAFETY: // - // Since we checked for the `x86` and `x86_64` target before - // getting here so `chunk` is at most 8 bytes. - // - // `i + chunk - 1 < ln / 2` # while condition - // `i + 8 - 1 < ln / 2` - // `i + 7 < ln / 2` - // so obviously `i + 1 < ln / 2` - // - // Since it's less than the length divided by 2, then it must be - // in bounds. - // - // This also means that the condition `0 < i + chunk <= ln` is - // always respected, ensuring the `pb` pointer can be used - // safely. - // - // Note: when updating this comment, update the others in the - // function too. + // - Note that `chunk` is either 4 or 8 due to the cfg check + // above. So `chunk - 1` is positive. + // - Indexing with index `i` is fine as the loop check guarantees + // `i + chunk - 1 < ln / 2` + // <=> `i < ln / 2 - (chunk - 1) < ln / 2 < ln`. + // - Indexing with index `ln - i - chunk = ln - (i + chunk)` is fine: + // - `i + chunk > 0` is trivially true. + // - The loop check guarantees: + // `i + chunk - 1 < ln / 2` + // <=> `i + chunk ≤ ln / 2 ≤ ln`, thus subtraction does not underflow. + // - The `read_unaligned` and `write_unaligned` calls are fine: + // - `pa` points to index `i` where `i < ln / 2 - (chunk - 1)` + // (see above) and `pb` points to index `ln - i - chunk`, so + // both are at least `chunk` + // many bytes away from the end of `self`. + // - Any initialized memory is valid `usize`. unsafe { let pa: *mut T = self.get_unchecked_mut(i); let pb: *mut T = self.get_unchecked_mut(ln - i - chunk); @@ -610,9 +607,6 @@ impl [T] { // This also means that the condition `0 < i + chunk <= ln` is // always respected, ensuring the `pb` pointer can be used // safely. - // - // Note: when updating this comment, update the others in the - // function too. unsafe { let pa: *mut T = self.get_unchecked_mut(i); let pb: *mut T = self.get_unchecked_mut(ln - i - chunk); From a308e74e1310fb02bcc4b46c44da769dfe426812 Mon Sep 17 00:00:00 2001 From: Poliorcetics Date: Tue, 11 Aug 2020 21:37:22 +0200 Subject: [PATCH 6/6] Add some texts to make the tidy check for unsafe documentation pass --- library/core/src/slice/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/core/src/slice/mod.rs b/library/core/src/slice/mod.rs index 8ad3c414454ae..f511ca8eb7851 100644 --- a/library/core/src/slice/mod.rs +++ b/library/core/src/slice/mod.rs @@ -559,7 +559,7 @@ impl [T] { // Use the llvm.bswap intrinsic to reverse u8s in a usize let chunk = mem::size_of::(); while i + chunk - 1 < ln / 2 { - // SAFETY: + // SAFETY: There are several things to check here: // // - Note that `chunk` is either 4 or 8 due to the cfg check // above. So `chunk - 1` is positive.