Skip to content

Commit

Permalink
compiler-rt: remove manual unrolling code from memcpy
Browse files Browse the repository at this point in the history
  • Loading branch information
dweiller committed Jan 17, 2025
1 parent 48b28db commit c3a3c30
Showing 1 changed file with 7 additions and 26 deletions.
33 changes: 7 additions & 26 deletions lib/compiler_rt/memcpy.zig
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,11 @@ fn memcpySmall(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) call
fn memcpyFast(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
@setRuntimeSafety(builtin.is_test);

const unroll_count = 1;
comptime assert(std.math.isPowerOfTwo(unroll_count));
const small_limit = @max(2 * @sizeOf(Element), unroll_count * @sizeOf(Element));
const small_limit = 2 * @sizeOf(Element);

if (copySmallLength(small_limit, dest.?, src.?, len)) return dest;

copyForwards(unroll_count, dest.?, src.?, len);
copyForwards(dest.?, src.?, len);

return dest;
}
Expand Down Expand Up @@ -107,22 +105,20 @@ inline fn copy16ToSmallLimit(
}

inline fn copyForwards(
comptime unroll_count: comptime_int,
noalias dest: [*]u8,
noalias src: [*]const u8,
len: usize,
) void {
@setRuntimeSafety(builtin.is_test);
assert(len >= 2 * @sizeOf(Element));
assert(len >= unroll_count * @sizeOf(Element));

dest[0..@sizeOf(Element)].* = src[0..@sizeOf(Element)].*;
const alignment_offset = @alignOf(Element) - @intFromPtr(src) % @alignOf(Element);
const n = len - alignment_offset;
const d = dest + alignment_offset;
const s = src + alignment_offset;

copyBlocksAlignedSource(@ptrCast(d), @alignCast(@ptrCast(s)), n, unroll_count);
copyBlocksAlignedSource(@ptrCast(d), @alignCast(@ptrCast(s)), n);

// copy last `@sizeOf(Element)` bytes unconditionally, since block copy
// methods only copy a multiple of `@sizeOf(Element)` bytes.
Expand All @@ -134,41 +130,26 @@ inline fn copyBlocksAlignedSource(
noalias dest: [*]align(1) Element,
noalias src: [*]const Element,
max_bytes: usize,
comptime unroll_count: comptime_int,
) void {
copyBlocks(dest, src, max_bytes, unroll_count);
copyBlocks(dest, src, max_bytes);
}

/// Copies the largest multiple of `@sizeOf(T)` bytes from `src` to `dest`,
/// that is less than `max_bytes` where `T` is the child type of `src` and
/// `dest`. The primary copy loop will be unrolled to perform `unroll_count`
/// copies per iteration.
/// `dest`.
inline fn copyBlocks(
noalias dest: anytype,
noalias src: anytype,
max_bytes: usize,
comptime unroll_count: comptime_int,
) void {
@setRuntimeSafety(builtin.is_test);
comptime assert(unroll_count > 0);

const T = @typeInfo(@TypeOf(dest)).pointer.child;
comptime assert(T == @typeInfo(@TypeOf(src)).pointer.child);

const loop_count = max_bytes / (@sizeOf(T) * unroll_count);
const loop_count = max_bytes / @sizeOf(T);

for (0..loop_count) |i| {
const du = dest[i * unroll_count ..][0..unroll_count];
const su = src[i * unroll_count ..][0..unroll_count];
inline for (du, su) |*d, s| {
d.* = s;
}
}

const tail_start = (max_bytes / @sizeOf(T)) - (unroll_count - 1);
const dt = dest[tail_start..][0 .. unroll_count - 1];
const st = src[tail_start..][0 .. unroll_count - 1];
inline for (dt, st) |*d, s| {
for (dest[0..loop_count], src[0..loop_count]) |*d, s| {
d.* = s;
}
}
Expand Down

0 comments on commit c3a3c30

Please sign in to comment.