Skip to content

Commit

Permalink
Auto merge of #116422 - the8472:chunked-generic-slice-eq, r=<try>
Browse files Browse the repository at this point in the history
Chunked generic slice eq

looks nice in a microbenchmark, let's see if perf agrees

```
OLD:
    slice::slice_cmp_generic 54.00ns/iter +/- 1.00ns
NEW:
    slice::slice_cmp_generic 20.00ns/iter +/- 2.00ns
```
  • Loading branch information
bors committed Oct 4, 2023
2 parents 2bbb619 + 2f78bce commit 8ea9808
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 1 deletion.
14 changes: 14 additions & 0 deletions library/core/benches/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,17 @@ fn fold_to_last(b: &mut Bencher) {
let slice: &[i32] = &[0; 1024];
b.iter(|| black_box(slice).iter().fold(None, |_, r| Some(NonNull::from(r))));
}

#[bench]
fn slice_cmp_generic(b: &mut Bencher) {
#[derive(PartialEq, Clone, Copy)]
struct Foo(u32, u32);

let left = [Foo(128, 128); 128];
let right = [Foo(128, 128); 128];

b.iter(|| {
let (left, right) = (black_box(&left), black_box(&right));
left.as_slice() == right.as_slice()
});
}
43 changes: 42 additions & 1 deletion library/core/src/slice/cmp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,56 @@ impl<A, B> SlicePartialEq<B> for [A]
where
A: PartialEq<B>,
{
#[inline]
default fn equal(&self, other: &[B]) -> bool {
if self.len() != other.len() {
return false;
}

self.iter().zip(other.iter()).all(|(x, y)| x == y)
// at least 8 items for unrolling to make sense (4 peeled + 4+ unrolled)
if self.len() < 8 {
return eq_small(self, other);
}

eq_unroll(self, other)
}
}

#[inline]
fn eq_small<A, B>(a: &[A], b: &[B]) -> bool
where
A: PartialEq<B>,
{
a.iter().zip(b).all(|(a, b)| a == b)
}

fn eq_unroll<A, B>(a: &[A], b: &[B]) -> bool
where
A: PartialEq<B>,
{
let (mut chunks_a, residual_a) = a.as_chunks::<4>();
let (mut chunks_b, residual_b) = b.as_chunks::<4>();
let peeled_a = chunks_a.take_first().unwrap();
let peeled_b = chunks_b.take_first().unwrap();

// peel the first chunk and do a short-circuiting comparison to bail early on mismatches
// in case comparisons are expensive
let mut result = eq_small(peeled_a, peeled_b);

// then check the residual, another chance to bail early
result = result && eq_small(residual_a, residual_b);

// iter.all short-circuits which means the backend can't unroll the loop due to early exits.
// So we unroll it manually.
result = result
&& chunks_a
.iter()
.zip(chunks_b)
.all(|(a, b)| (a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]));

result
}

// When each element can be compared byte-wise, we can compare all the bytes
// from the whole size in one call to the intrinsics.
impl<A, B> SlicePartialEq<B> for [A]
Expand Down

0 comments on commit 8ea9808

Please sign in to comment.