From 262452308a3192fecc3f4d88a2690dc67e681ef5 Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Wed, 4 Oct 2023 16:21:02 +0200
Subject: [PATCH 1/3] add benchmark for generic slice PartialEq impl

---
 library/core/benches/slice.rs | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
diff --git a/library/core/benches/slice.rs b/library/core/benches/slice.rs
index 3bfb35e684ea1..1ec51653d92ef 100644
--- a/library/core/benches/slice.rs
+++ b/library/core/benches/slice.rs
@@ -171,3 +171,17 @@ fn fold_to_last(b: &mut Bencher) {
     let slice: &[i32] = &[0; 1024];
     b.iter(|| black_box(slice).iter().fold(None, |_, r| Some(NonNull::from(r))));
 }
+
+#[bench]
+fn slice_cmp_generic(b: &mut Bencher) {
+    #[derive(PartialEq, Clone, Copy)]
+    struct Foo(u32, u32);
+
+    let left = [Foo(128, 128); 128];
+    let right = [Foo(128, 128); 128];
+
+    b.iter(|| {
+        let (left, right) = (black_box(&left), black_box(&right));
+        left.as_slice() == right.as_slice()
+    });
+}

From 2f78bce940fbcf4d1fb0ca14c650aeab870a1683 Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Wed, 4 Oct 2023 16:21:21 +0200
Subject: [PATCH 2/3] unroll slice::equal impl

---
 library/core/src/slice/cmp.rs | 43 ++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/library/core/src/slice/cmp.rs b/library/core/src/slice/cmp.rs
index 075347b80d031..3231e4313a8a3 100644
--- a/library/core/src/slice/cmp.rs
+++ b/library/core/src/slice/cmp.rs
@@ -55,15 +55,56 @@ impl<A, B> SlicePartialEq<B> for [A]
 where
     A: PartialEq<B>,
 {
+    #[inline]
     default fn equal(&self, other: &[B]) -> bool {
         if self.len() != other.len() {
             return false;
         }
 
-        self.iter().zip(other.iter()).all(|(x, y)| x == y)
+        // at least 8 items for unrolling to make sense (4 peeled + 4+ unrolled)
+        if self.len() < 8 {
+            return eq_small(self, other);
+        }
+
+        eq_unroll(self, other)
     }
 }
 
+#[inline]
+fn eq_small<A, B>(a: &[A], b: &[B]) -> bool
+where
+    A: PartialEq<B>,
+{
+    a.iter().zip(b).all(|(a, b)| a == b)
+}
+
+fn eq_unroll<A, B>(a: &[A], b: &[B]) -> bool
+where
+    A: PartialEq<B>,
+{
+    let (mut chunks_a, residual_a) = a.as_chunks::<4>();
+    let (mut chunks_b, residual_b) = b.as_chunks::<4>();
+    let peeled_a = chunks_a.take_first().unwrap();
+    let peeled_b = chunks_b.take_first().unwrap();
+
+    // peel the first chunk and do a short-circuiting comparison to bail early on mismatches
+    // in case comparisons are expensive
+    let mut result = eq_small(peeled_a, peeled_b);
+
+    // then check the residual, another chance to bail early
+    result = result && eq_small(residual_a, residual_b);
+
+    // iter.all short-circuits which means the backend can't unroll the loop due to early exits.
+    // So we unroll it manually.
+    result = result
+        && chunks_a
+            .iter()
+            .zip(chunks_b)
+            .all(|(a, b)| (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]));
+
+    result
+}
+
 // When each element can be compared byte-wise, we can compare all the bytes
 // from the whole size in one call to the intrinsics.
 impl<A, B> SlicePartialEq<B> for [A]

From ef4600dc88eb9dffd45cac9df5bea9007326bd08 Mon Sep 17 00:00:00 2001
From: The 8472 <git@infinite-source.de>
Date: Thu, 5 Oct 2023 14:39:17 +0200
Subject: [PATCH 3/3] handcode the loops so LLVM has to chew less IR

---
 library/core/src/slice/cmp.rs | 70 ++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 39 deletions(-)

diff --git a/library/core/src/slice/cmp.rs b/library/core/src/slice/cmp.rs
index 3231e4313a8a3..8082b48552c73 100644
--- a/library/core/src/slice/cmp.rs
+++ b/library/core/src/slice/cmp.rs
@@ -61,48 +61,40 @@ where
             return false;
         }
 
-        // at least 8 items for unrolling to make sense (4 peeled + 4+ unrolled)
-        if self.len() < 8 {
-            return eq_small(self, other);
+        // ZSTs have no identity and slices don't guarantee which addresses-to-ZSTs they produce
+        // so we only need to compare them once to determine the behavior of the PartialEq impl
+        if const { mem::size_of::<A>() == 0 && mem::size_of::<B>() == 0 } {
+            // zero-length slices are always equal
+            if self.len() == 0 {
+                return true;
+            }
+            // SAFETY: A and B are ZSTs so it's ok to conjure them out of thin air
+            return unsafe { mem::zeroed::<A>() == mem::zeroed::<B>() };
         }
 
-        eq_unroll(self, other)
-    }
-}
-
-#[inline]
-fn eq_small<A, B>(a: &[A], b: &[B]) -> bool
-where
-    A: PartialEq<B>,
-{
-    a.iter().zip(b).all(|(a, b)| a == b)
-}
+        const UNROLL: usize = 4;
+        let mut i = 0;
+        let mut is_eq = true;
+        while i + UNROLL < self.len() && is_eq {
+            // SAFETY: slices are of the same length and loop conditions ensure indexes are in bounds
+            unsafe {
+                is_eq = is_eq & (self.get_unchecked(i) == other.get_unchecked(i));
+                is_eq = is_eq & (self.get_unchecked(i + 1) == other.get_unchecked(i + 1));
+                is_eq = is_eq & (self.get_unchecked(i + 2) == other.get_unchecked(i + 2));
+                is_eq = is_eq & (self.get_unchecked(i + 3) == other.get_unchecked(i + 3));
+                i = i.unchecked_add(UNROLL);
+            }
+        }
+        while i < self.len() && is_eq {
+            // SAFETY: slices are of the same length and loop conditions ensure indexes are in bounds
+            unsafe {
+                is_eq = is_eq & (self.get_unchecked(i) == other.get_unchecked(i));
+                i = i.unchecked_add(1);
+            }
+        }
 
-fn eq_unroll<A, B>(a: &[A], b: &[B]) -> bool
-where
-    A: PartialEq<B>,
-{
-    let (mut chunks_a, residual_a) = a.as_chunks::<4>();
-    let (mut chunks_b, residual_b) = b.as_chunks::<4>();
-    let peeled_a = chunks_a.take_first().unwrap();
-    let peeled_b = chunks_b.take_first().unwrap();
-
-    // peel the first chunk and do a short-circuiting comparison to bail early on mismatches
-    // in case comparisons are expensive
-    let mut result = eq_small(peeled_a, peeled_b);
-
-    // then check the residual, another chance to bail early
-    result = result && eq_small(residual_a, residual_b);
-
-    // iter.all short-circuits which means the backend can't unroll the loop due to early exits.
-    // So we unroll it manually.
-    result = result
-        && chunks_a
-            .iter()
-            .zip(chunks_b)
-            .all(|(a, b)| (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]));
-
-    result
+        is_eq
+    }
 }
 
 // When each element can be compared byte-wise, we can compare all the bytes