Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Implement SIMD floating point conversion instructions #6924

Merged
merged 4 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 1 addition & 11 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -254,17 +254,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
return true;
}

let known_failure = [
"canonicalize_nan",
"cvt_from_uint",
"issue_3327_bnot_lowering",
"simd_conversions",
"simd_i32x4_trunc_sat_f32x4",
"simd_i32x4_trunc_sat_f64x2",
"simd_load",
"simd_splat",
]
.contains(&testname);
let known_failure = ["issue_3327_bnot_lowering"].contains(&testname);

known_failure
}
Expand Down
31 changes: 25 additions & 6 deletions cranelift/codegen/src/isa/riscv64/inst/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => 0b010010,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => 0b010010,
}
}

Expand All @@ -779,7 +781,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => VecOpCategory::OPFVV,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => VecOpCategory::OPFVV,
VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
}
Expand All @@ -806,12 +810,17 @@ impl VecAluOpRR {
VecAluOpRR::VzextVF2 => 0b00110,
VecAluOpRR::VsextVF2 => 0b00111,
// VFUNARY0
// single-width converts
VecAluOpRR::VfcvtxufV => 0b00000,
VecAluOpRR::VfcvtxfV => 0b00001,
VecAluOpRR::VfcvtrtzxufV => 0b00110,
VecAluOpRR::VfcvtrtzxfV => 0b00111,
VecAluOpRR::VfcvtfxuV => 0b00010,
VecAluOpRR::VfcvtfxV => 0b00011,
// widening converts
VecAluOpRR::VfwcvtffV => 0b01100,
// narrowing converts
VecAluOpRR::VfncvtffW => 0b10100,
// These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
// > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
Expand All @@ -837,7 +846,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => true,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => true,
VecAluOpRR::VmvSX
| VecAluOpRR::VfmvSF
| VecAluOpRR::VmvVV
Expand Down Expand Up @@ -865,7 +876,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => RegClass::Vector,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => RegClass::Vector,
VecAluOpRR::VmvXS => RegClass::Int,
VecAluOpRR::VfmvFS => RegClass::Float,
}
Expand All @@ -888,7 +901,9 @@ impl VecAluOpRR {
| VecAluOpRR::VfcvtrtzxufV
| VecAluOpRR::VfcvtrtzxfV
| VecAluOpRR::VfcvtfxuV
| VecAluOpRR::VfcvtfxV => RegClass::Vector,
| VecAluOpRR::VfcvtfxV
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => RegClass::Vector,
VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
}
Expand All @@ -902,7 +917,9 @@ impl VecAluOpRR {
| VecAluOpRR::VzextVF8
| VecAluOpRR::VsextVF2
| VecAluOpRR::VsextVF4
| VecAluOpRR::VsextVF8 => true,
| VecAluOpRR::VsextVF8
| VecAluOpRR::VfwcvtffV
| VecAluOpRR::VfncvtffW => true,
_ => false,
}
}
Expand Down Expand Up @@ -931,6 +948,8 @@ impl fmt::Display for VecAluOpRR {
VecAluOpRR::VfcvtrtzxfV => "vfcvt.rtz.x.f.v",
VecAluOpRR::VfcvtfxuV => "vfcvt.f.xu.v",
VecAluOpRR::VfcvtfxV => "vfcvt.f.x.v",
VecAluOpRR::VfwcvtffV => "vfwcvt.f.f.v",
VecAluOpRR::VfncvtffW => "vfncvt.f.f.w",
})
}
}
Expand Down
14 changes: 14 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,8 @@
(VfcvtrtzxfV)
(VfcvtfxuV)
(VfcvtfxV)
(VfwcvtffV)
(VfncvtffW)
))

;; Returns the canonical destination type for a VecAluOpRRImm5.
Expand Down Expand Up @@ -1060,6 +1062,18 @@
(rule (rv_vfcvt_f_x_v vs mask vstate)
(vec_alu_rr (VecAluOpRR.VfcvtfxV) vs mask vstate))

;; Helper for emitting the `vfwcvt.f.f.v` instruction.
;; Convert single-width float to double-width float.
(decl rv_vfwcvt_f_f_v (VReg VecOpMasking VState) VReg)
(rule (rv_vfwcvt_f_f_v vs mask vstate)
(vec_alu_rr (VecAluOpRR.VfwcvtffV) vs mask vstate))

;; Helper for emitting the `vfncvt.f.f.w` instruction.
;; Convert double-width float to single-width float.
(decl rv_vfncvt_f_f_w (VReg VecOpMasking VState) VReg)
(rule (rv_vfncvt_f_f_w vs mask vstate)
(vec_alu_rr (VecAluOpRR.VfncvtffW) vs mask vstate))

;; Helper for emitting the `vslidedown.vx` instruction.
;; `vslidedown` moves all elements in the vector down by n elements.
;; The top most elements are up to the tail policy.
Expand Down
53 changes: 41 additions & 12 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1201,10 +1201,26 @@
(rule (lower (fpromote x))
(rv_fcvtds x))

;;;;; Rules for `fvpromote_low`;;;;;;;;;;;;

(rule (lower (has_type (ty_vec_fits_in_register ty) (fvpromote_low x)))
(if-let half_ty (ty_half_width ty))
(rv_vfwcvt_f_f_v x (unmasked) (vstate_mf2 half_ty)))

;;;;; Rules for `fdemote`;;;;;;;;;;;;;;;;;;
(rule (lower (fdemote x))
(rv_fcvtsd x))

;;;;; Rules for `fvdemote`;;;;;;;;;;;;;;;;;

;; `vfncvt...` leaves the upper bits of the register undefined so
;; we need to zero them out.
(rule (lower (has_type (ty_vec_fits_in_register ty @ $F32X4) (fvdemote x)))
(if-let zero (i8_to_imm5 0))
(let ((narrow VReg (rv_vfncvt_f_f_w x (unmasked) (vstate_mf2 ty)))
(mask VReg (gen_vec_mask 0xC)))
(rv_vmerge_vim narrow zero mask ty)))


;;;;; Rules for for float arithmetic

Expand Down Expand Up @@ -1613,39 +1629,52 @@
(gen_fcvt_int $false v $false from to))

;;;;; Rules for `fcvt_to_sint`;;;;;;;;;
(rule
(lower (has_type to (fcvt_to_sint v @ (value_type from))))
(rule 0 (lower (has_type to (fcvt_to_sint v @ (value_type (ty_scalar_float from)))))
(gen_fcvt_int $false v $true from to))

;;;;; Rules for `fcvt_to_sint_sat`;;;;;;;;;
(rule
(lower (has_type to (fcvt_to_sint_sat v @ (value_type from))))
(rule 0 (lower (has_type to (fcvt_to_sint_sat v @ (value_type (ty_scalar_float from)))))
(gen_fcvt_int $true v $true from to))

(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_sint_sat v @ (value_type from_ty))))
(if-let zero (i8_to_imm5 0))
(let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty))
(cvt VReg (rv_vfcvt_rtz_x_f_v v (unmasked) from_ty)))
(rv_vmerge_vim cvt zero is_nan from_ty)))

;;;;; Rules for `fcvt_to_uint_sat`;;;;;;;;;
(rule
(lower (has_type to (fcvt_to_uint_sat v @ (value_type from))))
(rule 0 (lower (has_type to (fcvt_to_uint_sat v @ (value_type (ty_scalar_float from)))))
(gen_fcvt_int $true v $false from to))

(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_to_uint_sat v @ (value_type from_ty))))
(if-let zero (i8_to_imm5 0))
(let ((is_nan VReg (rv_vmfne_vv v v (unmasked) from_ty))
(cvt VReg (rv_vfcvt_rtz_xu_f_v v (unmasked) from_ty)))
(rv_vmerge_vim cvt zero is_nan from_ty)))

;;;;; Rules for `fcvt_from_sint`;;;;;;;;;
(rule
(lower (has_type to (fcvt_from_sint v @ (value_type from_ty))))
(rule 0 (lower (has_type (ty_scalar_float to) (fcvt_from_sint v @ (value_type from_ty))))
(let ((float_op FpuOPRR (int_convert_2_float_op from_ty $true to))
(value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Signed))))
(fpu_rr float_op to value)))

(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_sint v @ (value_type from_ty))))
(rv_vfcvt_f_x_v v (unmasked) from_ty))

;;;;; Rules for `fcvt_from_uint`;;;;;;;;;
(rule
(lower (has_type to (fcvt_from_uint v @ (value_type from_ty))))
(rule 0 (lower (has_type (ty_scalar_float to) (fcvt_from_uint v @ (value_type from_ty))))
(let ((float_op FpuOPRR (int_convert_2_float_op from_ty $false to))
(value XReg (normalize_fcvt_from_int v from_ty (ExtendOp.Zero))))
(fpu_rr float_op to value)))

(rule 1 (lower (has_type (ty_vec_fits_in_register _) (fcvt_from_uint v @ (value_type from_ty))))
(rv_vfcvt_f_xu_v v (unmasked) from_ty))

;;;;; Rules for `symbol_value`;;;;;;;;;
(rule
(lower (symbol_value (symbol_value_data name _ offset)))
(load_ext_name name offset)
)
(load_ext_name name offset))

;;;;; Rules for `bitcast`;;;;;;;;;
(rule
(lower (has_type out_ty (bitcast _ v @ (value_type in_ty))))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v

function %fcvt_from_sint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_sint.f32x4 v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vfcvt.f.x.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x92, 0x11, 0x4a
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x02, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v

function %fcvt_from_uint(i32x4) -> f32x4 {
block0(v0: i32x4):
v1 = fcvt_from_uint.f32x4 v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vfcvt.f.xu.v v4,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v4,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x12, 0x11, 0x4a
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x02, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v

function %fcvt_to_sint_sat(f32x4) -> i32x4 {
block0(v0:f32x4):
v1 = fcvt_to_sint_sat.i32x4 v0
return v1
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vmfne.vv v0,v1,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vfcvt.rtz.x.f.v v6,v1 #avl=4, #vtype=(e32, m1, ta, ma)
; vmerge.vim v8,v6,0,v0.t #avl=4, #vtype=(e32, m1, ta, ma)
; vse8.v v8,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; .byte 0x57, 0x70, 0x02, 0xcd
; .byte 0x57, 0x90, 0x10, 0x72
; .byte 0x57, 0x93, 0x13, 0x4a
; .byte 0x57, 0x34, 0x60, 0x5c
; .byte 0x57, 0x70, 0x08, 0xcc
; .byte 0x27, 0x04, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret

Loading