Implements convert low signed integer to float for x64 simd

bytecodealliance · Mar 26, 2021 · 4a48904 · 4a48904
1 parent 81c4403
commit 4a48904
Show file tree

Hide file tree

Showing 8 changed files with 45 additions and 4 deletions.
diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -4325,6 +4325,26 @@ pub(crate) fn define(
         .operands_out(vec![a]),
     );
 
+    ig.push(
+        Inst::new(
+            "fcvt_low_from_sint",
+            r#"
+        Converts packed signed doubleword integers to packed double precision floating point.
+
+        Considering only the low half of the register, each lane in `x` is interpreted as a
+        signed doubleword integer that is then converted to a double precision float. This
+        instruction differs from fcvt_from_sint in that it converts half the number of lanes
+        which are converted to occupy twice the number of bits. No rounding should be needed
+        for the resulting float.
+
+        The result type will have half the number of vector lanes as the input.
+        "#,
+            &formats.unary,
+        )
+        .operands_in(vec![x])
+        .operands_out(vec![a]),
+    );
+
     let WideInt = &TypeVar::new(
         "WideInt",
         "An integer type with lanes from `i16` upwards",

diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -3013,6 +3013,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         }
 
         Opcode::TlsValue => unimplemented!("tls_value"),
+        Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
     }
 
     Ok(())

diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -480,6 +480,7 @@ pub enum SseOpcode {
     Cmpss,
     Cmpsd,
     Cvtdq2ps,
+    Cvtdq2pd,
     Cvtsd2ss,
     Cvtsd2si,
     Cvtsi2ss,
@@ -673,6 +674,7 @@ impl SseOpcode {
             | SseOpcode::Cmpsd
             | SseOpcode::Comisd
             | SseOpcode::Cvtdq2ps
+            | SseOpcode::Cvtdq2pd
             | SseOpcode::Cvtsd2ss
             | SseOpcode::Cvtsd2si
             | SseOpcode::Cvtsi2sd
@@ -828,6 +830,7 @@ impl fmt::Debug for SseOpcode {
             SseOpcode::Comiss => "comiss",
             SseOpcode::Comisd => "comisd",
             SseOpcode::Cvtdq2ps => "cvtdq2ps",
+            SseOpcode::Cvtdq2pd => "cvtdq2pd",
             SseOpcode::Cvtsd2ss => "cvtsd2ss",
             SseOpcode::Cvtsd2si => "cvtsd2si",
             SseOpcode::Cvtsi2ss => "cvtsi2ss",

diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -1768,6 +1768,7 @@ pub(crate) fn emit(
             let rex = RexFlags::clear_w();
 
             let (prefix, opcode, num_opcodes) = match op {
+                SseOpcode::Cvtdq2pd => (LegacyPrefixes::_F3, 0x0FE6, 2),
                 SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A, 2),
                 SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A, 2),
                 SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28, 2),

diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -3859,6 +3859,12 @@ fn test_x64_emit() {
         "pabsd   %xmm10, %xmm11",
     ));
 
+    insns.push((
+        Inst::xmm_unary_rm_r(SseOpcode::Cvtdq2pd, RegMem::reg(xmm2), w_xmm8),
+        "F3440FE6C2",
+        "cvtdq2pd %xmm2, %xmm8",
+    ));
+
     // Xmm to int conversions, and conversely.
 
     insns.push((

diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
@@ -3915,7 +3915,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
             }
         }
-
+        Opcode::FcvtLowFromSint => {
+            let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
+            let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+            ctx.emit(Inst::xmm_unary_rm_r(
+                SseOpcode::Cvtdq2pd,
+                RegMem::from(src),
+                dst,
+            ));
+        }
         Opcode::FcvtFromUint => {
             let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
             let ty = ty.unwrap();

diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
@@ -563,6 +563,7 @@ where
         Opcode::FcvtToSintSat => unimplemented!("FcvtToSintSat"),
         Opcode::FcvtFromUint => unimplemented!("FcvtFromUint"),
         Opcode::FcvtFromSint => unimplemented!("FcvtFromSint"),
+        Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
         Opcode::Isplit => unimplemented!("Isplit"),
         Opcode::Iconcat => unimplemented!("Iconcat"),
         Opcode::AtomicRmw => unimplemented!("AtomicRmw"),

diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs
@@ -1775,6 +1775,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let a = pop1_with_bitcast(state, I32X4, builder);
             state.push1(builder.ins().fcvt_from_uint(F32X4, a))
         }
+        Operator::F64x2ConvertLowI32x4S => {
+            let a = pop1_with_bitcast(state, I32X4, builder);
+            state.push1(builder.ins().fcvt_low_from_sint(F64X2, a));
+        }
         Operator::I32x4TruncSatF32x4S => {
             let a = pop1_with_bitcast(state, F32X4, builder);
             state.push1(builder.ins().fcvt_to_sint_sat(I32X4, a))
@@ -1851,12 +1855,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let arg = pop1_with_bitcast(state, type_of(op), builder);
             state.push1(builder.ins().nearest(arg));
         }
-
         Operator::I32x4DotI16x8S => {
             let (a, b) = pop2_with_bitcast(state, I16X8, builder);
             state.push1(builder.ins().widening_pairwise_dot_product_s(a, b));
         }
-
         Operator::I64x2ExtendLowI32x4S
         | Operator::I64x2ExtendHighI32x4S
         | Operator::I64x2ExtendLowI32x4U
@@ -1880,7 +1882,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::I32x4ExtAddPairwiseI16x8U
         | Operator::F32x4DemoteF64x2Zero
         | Operator::F64x2PromoteLowF32x4
-        | Operator::F64x2ConvertLowI32x4S
         | Operator::F64x2ConvertLowI32x4U
         | Operator::I32x4TruncSatF64x2SZero
         | Operator::I32x4TruncSatF64x2UZero
-Original file line number
+Diff line change
@@ Expand Up / @@ -3013,6 +3013,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>( @@
             }
             Opcode::TlsValue => unimplemented!("tls_value"),
+            Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"),
         }
         Ok(())
@@ Expand Down @@