Skip to content

Commit

Permalink
Merge pull request #3004 from afonso360/aarch64-i128-rotates
Browse files Browse the repository at this point in the history
aarch64: Implement lowering rotl/rotr for i128 values
  • Loading branch information
cfallin authored Jun 21, 2021
2 parents a24f094 + f7f5244 commit 4246e69
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 1 deletion.
55 changes: 55 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower_inst.rs
Original file line number Diff line number Diff line change
Expand Up @@ -878,6 +878,61 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let ty = ty.unwrap();
let ty_bits_size = ty_bits(ty) as u8;

// TODO: We can do much better codegen if we have a constant amt
if ty == I128 {
let dst = get_output_reg(ctx, outputs[0]);
let src = put_input_in_regs(ctx, inputs[0]);
let amt_src = put_input_in_regs(ctx, inputs[1]).regs()[0];

let tmp = ctx.alloc_tmp(I128);
let inv_amt = ctx.alloc_tmp(I64).only_reg().unwrap();

lower_constant_u64(ctx, inv_amt, 128);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Sub64,
rd: inv_amt,
rn: inv_amt.to_reg(),
rm: amt_src,
});

if is_rotl {
// rotl
// (shl.i128 tmp, amt)
// (ushr.i128 dst, 128-amt)

emit_shl_i128(ctx, src, tmp, amt_src);
emit_shr_i128(
ctx,
src,
dst,
inv_amt.to_reg(),
/* is_signed = */ false,
);
} else {
// rotr
// (ushr.i128 tmp, amt)
// (shl.i128 dst, 128-amt)

emit_shr_i128(ctx, src, tmp, amt_src, /* is_signed = */ false);
emit_shl_i128(ctx, src, dst, inv_amt.to_reg());
}

ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Orr64,
rd: dst.regs()[0],
rn: dst.regs()[0].to_reg(),
rm: tmp.regs()[0].to_reg(),
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Orr64,
rd: dst.regs()[1],
rn: dst.regs()[1].to_reg(),
rm: tmp.regs()[1].to_reg(),
});

return Ok(());
}

let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let rn = put_input_in_reg(
ctx,
Expand Down
69 changes: 69 additions & 0 deletions cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,39 @@ target aarch64
;; ROR, variable
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

function %i128_rotr(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = rotr.i128 v0, v1
return v2
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x3, #128
; nextln: sub x5, x3, x2
; nextln: orn w4, wzr, w2
; nextln: lsl x6, x1, #1
; nextln: lsr x3, x0, x2
; nextln: lsl x6, x6, x4
; nextln: lsr x4, x1, x2
; nextln: ands xzr, x2, #64
; nextln: orr x2, x3, x6
; nextln: csel x3, xzr, x4, ne
; nextln: csel x4, x4, x2, ne
; nextln: orn w2, wzr, w5
; nextln: lsr x6, x0, #1
; nextln: lsl x1, x1, x5
; nextln: lsr x2, x6, x2
; nextln: lsl x0, x0, x5
; nextln: ands xzr, x5, #64
; nextln: orr x1, x1, x2
; nextln: csel x1, x0, x1, ne
; nextln: csel x0, xzr, x0, ne
; nextln: orr x0, x0, x4
; nextln: orr x1, x1, x3
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %f0(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = rotr.i64 v0, v1
Expand Down Expand Up @@ -70,6 +103,42 @@ block0(v0: i8, v1: i8):
;; ROL, variable
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

function %i128_rotl(i128, i128) -> i128 {
block0(v0: i128, v1: i128):
v2 = rotl.i128 v0, v1
return v2
}

; check: stp fp, lr, [sp, #-16]!
; nextln: mov fp, sp
; nextln: movz x3, #128
; nextln: sub x5, x3, x2
; nextln: orn w4, wzr, w2
; nextln: lsr x6, x0, #1
; nextln: lsl x3, x1, x2
; nextln: lsr x6, x6, x4
; nextln: lsl x4, x0, x2
; nextln: ands xzr, x2, #64
; nextln: orr x2, x3, x6
; nextln: csel x3, x4, x2, ne
; nextln: csel x4, xzr, x4, ne
; nextln: orn w2, wzr, w5
; nextln: lsl x6, x1, #1
; nextln: lsr x0, x0, x5
; nextln: lsl x2, x6, x2
; nextln: lsr x1, x1, x5
; nextln: ands xzr, x5, #64
; nextln: orr x2, x0, x2
; nextln: csel x0, xzr, x1, ne
; nextln: csel x1, x1, x2, ne
; nextln: orr x1, x1, x4
; nextln: orr x0, x0, x3
; nextln: mov x2, x0
; nextln: mov x0, x1
; nextln: mov x1, x2
; nextln: ldp fp, lr, [sp], #16
; nextln: ret

function %f4(i64, i64) -> i64 {
block0(v0: i64, v1: i64):
v2 = rotl.i64 v0, v1
Expand Down
2 changes: 1 addition & 1 deletion cranelift/filetests/filetests/runtests/i128-rotate.clif
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
test run
; target aarch64 TODO: Not yet implemented on aarch64
target aarch64
; target s390x TODO: Not yet implemented on s390x
target x86_64 machinst

Expand Down

0 comments on commit 4246e69

Please sign in to comment.