Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

aarch64: Implement lowering rotl/rotr for i128 values #3004

Merged
merged 1 commit into from
Jun 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,61 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let ty = ty.unwrap();
let ty_bits_size = ty_bits(ty) as u8;

// TODO: We can do much better codegen if we have a constant amt
if ty == I128 {
let dst = get_output_reg(ctx, outputs[0]);
let src = put_input_in_regs(ctx, inputs[0]);
let amt_src = put_input_in_regs(ctx, inputs[1]).regs()[0];

let tmp = ctx.alloc_tmp(I128);
let inv_amt = ctx.alloc_tmp(I64).only_reg().unwrap();

lower_constant_u64(ctx, inv_amt, 128);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Sub64,
rd: inv_amt,
rn: inv_amt.to_reg(),
rm: amt_src,
});

if is_rotl {
// rotl
// (shl.i128 tmp, amt)
// (ushr.i128 dst, 128-amt)

emit_shl_i128(ctx, src, tmp, amt_src);
emit_shr_i128(
ctx,
src,
dst,
inv_amt.to_reg(),
/* is_signed = */ false,
);
} else {
// rotr
// (ushr.i128 tmp, amt)
// (shl.i128 dst, 128-amt)

emit_shr_i128(ctx, src, tmp, amt_src, /* is_signed = */ false);
emit_shl_i128(ctx, src, dst, inv_amt.to_reg());
}

ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Orr64,
rd: dst.regs()[0],
rn: dst.regs()[0].to_reg(),
rm: tmp.regs()[0].to_reg(),
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Orr64,
rd: dst.regs()[1],
rn: dst.regs()[1].to_reg(),
rm: tmp.regs()[1].to_reg(),
});

return Ok(());
}

let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(
ctx,
Expand Down
69 changes: 69 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,39 @@ target aarch64
;; ROR, variable
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

function %i128_rotr(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = rotr.i128 v0, v1
return v2
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x3, #128
; nextln: sub x5, x3, x2
; nextln: orn w4, wzr, w2
; nextln: lsl x6, x1, #1
; nextln: lsr x3, x0, x2
; nextln: lsl x6, x6, x4
; nextln: lsr x4, x1, x2
; nextln: ands xzr, x2, #64
; nextln: orr x2, x3, x6
; nextln: csel x3, xzr, x4, ne
; nextln: csel x4, x4, x2, ne
; nextln: orn w2, wzr, w5
; nextln: lsr x6, x0, #1
; nextln: lsl x1, x1, x5
; nextln: lsr x2, x6, x2
; nextln: lsl x0, x0, x5
; nextln: ands xzr, x5, #64
; nextln: orr x1, x1, x2
; nextln: csel x1, x0, x1, ne
; nextln: csel x0, xzr, x0, ne
; nextln: orr x0, x0, x4
; nextln: orr x1, x1, x3
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %f0(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = rotr.i64 v0, v1
Expand Down Expand Up @@ -70,6 +103,42 @@ block0(v0: i8, v1: i8):
;; ROL, variable
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

function %i128_rotl(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = rotl.i128 v0, v1
return v2
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x3, #128
; nextln: sub x5, x3, x2
; nextln: orn w4, wzr, w2
; nextln: lsr x6, x0, #1
; nextln: lsl x3, x1, x2
; nextln: lsr x6, x6, x4
; nextln: lsl x4, x0, x2
; nextln: ands xzr, x2, #64
; nextln: orr x2, x3, x6
; nextln: csel x3, x4, x2, ne
; nextln: csel x4, xzr, x4, ne
; nextln: orn w2, wzr, w5
; nextln: lsl x6, x1, #1
; nextln: lsr x0, x0, x5
; nextln: lsl x2, x6, x2
; nextln: lsr x1, x1, x5
; nextln: ands xzr, x5, #64
; nextln: orr x2, x0, x2
; nextln: csel x0, xzr, x1, ne
; nextln: csel x1, x1, x2, ne
; nextln: orr x1, x1, x4
; nextln: orr x0, x0, x3
; nextln: mov x2, x0
; nextln: mov x0, x1
; nextln: mov x1, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %f4(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = rotl.i64 v0, v1
Expand Down
2 changes: 1 addition & 1 deletion cranelift/filetests/filetests/runtests/i128-rotate.clif
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
target aarch64
; target s390x TODO: Not yet implemented on s390x
target x86_64 machinst

Expand Down