From f8c0a882994d019d3d39763d5c8ff8f4eb3a95c3 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Wed, 10 Aug 2022 19:34:51 +0200
Subject: [PATCH] Fix sret for AArch64 (#4634)

* Fix sret for AArch64

AArch64 requires the struct return address argument to be stored in the x8
register. This register is never used for regular arguments.

* Add extra sret tests for x86_64
---
 cranelift/codegen/src/isa/aarch64/abi.rs      | 19 +++++++
 .../filetests/filetests/isa/aarch64/call.clif | 51 +++++++++++++++++++
 .../filetests/isa/x64/struct-ret.clif         | 42 +++++++++++++++
 3 files changed, 112 insertions(+)

diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs
index f9bfc1e740e7..e5e6961a83ed 100644
--- a/cranelift/codegen/src/isa/aarch64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -143,6 +143,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
             let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?;
 
             if let ir::ArgumentPurpose::StructArgument(size) = param.purpose {
+                assert_eq!(args_or_rets, ArgsOrRets::Args);
                 let offset = next_stack as i64;
                 let size = size as u64;
                 assert!(size % 8 == 0, "StructArgument size is not properly aligned");
@@ -156,6 +157,24 @@ impl ABIMachineSpec for AArch64MachineDeps {
                 continue;
             }
 
+            if let ir::ArgumentPurpose::StructReturn = param.purpose {
+                // FIXME add assert_eq!(args_or_rets, ArgsOrRets::Args); once
+                // ensure_struct_return_ptr_is_returned is gone.
+                assert!(
+                    param.value_type == types::I64,
+                    "StructReturn must be a pointer sized integer"
+                );
+                ret.push(ABIArg::Slots {
+                    slots: smallvec![ABIArgSlot::Reg {
+                        reg: xreg(8).to_real_reg().unwrap(),
+                        ty: types::I64,
+                        extension: param.extension,
+                    },],
+                    purpose: ir::ArgumentPurpose::StructReturn,
+                });
+                continue;
+            }
+
             // Handle multi register params
             //
             // See AArch64 ABI (https://github.com/ARM-software/abi-aa/blob/2021Q1/aapcs64/aapcs64.rst#642parameter-passing-rules), (Section 6.4.2 Stage C).
diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif
index 97a262b2320a..b1c6d211aa08 100644
--- a/cranelift/filetests/filetests/isa/aarch64/call.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/call.clif
@@ -452,3 +452,54 @@ block0:
 ;   str w7, [x11]
 ;   ret
 
+function %f17(i64 sret) {
+block0(v0: i64):
+    v1 = iconst.i64 42
+    store v1, v0
+    return
+}
+
+; block0:
+;   mov x5, x8
+;   movz x4, #42
+;   str x4, [x8]
+;   ret
+
+function %f18(i64) -> i64 {
+    fn0 = %g(i64 sret) -> i64
+
+block0(v0: i64):
+    v1 = call fn0(v0)
+    return v1
+}
+
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+; block0:
+;   mov x8, x0
+;   ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x5
+;   mov x0, x8
+;   ldp fp, lr, [sp], #16
+;   ret
+
+function %f18(i64 sret) {
+    fn0 = %g(i64 sret)
+
+block0(v0: i64):
+    call fn0(v0)
+    return
+}
+
+;   stp fp, lr, [sp, #-16]!
+;   mov fp, sp
+;   str x24, [sp, #-16]!
+; block0:
+;   mov x24, x8
+;   ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0
+;   blr x5
+;   mov x8, x24
+;   ldr x24, [sp], #16
+;   ldp fp, lr, [sp], #16
+;   ret
+
diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
index a13136356908..4046ba89fc7a 100644
--- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif
+++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
@@ -18,3 +18,45 @@ block0(v0: i64):
 ;   popq    %rbp
 ;   ret
 
+
+function %f1(i64, i64) -> i64 {
+    fn0 = %f2(i64 sret) -> i64
+
+block0(v0: i64, v1: i64):
+    v2 = call fn0(v1)
+    return v2
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   movq    %rsi, %rdi
+;   load_ext_name %f2+0, %r9
+;   call    *%r9
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
+function %f3(i64 sret) {
+    fn0 = %f4(i64 sret)
+
+block0(v0: i64):
+    call fn0(v0)
+    return
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+;   subq    %rsp, $16, %rsp
+;   movq    %r15, 0(%rsp)
+; block0:
+;   movq    %rdi, %r15
+;   load_ext_name %f4+0, %r8
+;   call    *%r8
+;   movq    %r15, %rax
+;   movq    0(%rsp), %r15
+;   addq    %rsp, $16, %rsp
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+