diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index 21ff5ad3442b..d3b10f8b8c67 100755 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -2057,6 +2057,125 @@ pub(crate) fn define( ]), ); + { + let of_out = Operand::new("of", i8).with_doc("Overflow flag"); + ig.push( + Inst::new( + "uadd_overflow", + r#" + Add integers unsigned with overflow out. + ``of`` is set when the addition overflowed. + ```text + a &= x + y \pmod 2^B \\ + of &= x+y >= 2^B + ``` + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![Operand::new("x", iB), Operand::new("y", iB)]) + .operands_out(vec![Operand::new("a", iB), of_out.clone()]), + ); + + ig.push( + Inst::new( + "sadd_overflow", + r#" + Add integers signed with overflow out. + ``of`` is set when the addition over- or underflowed. + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![Operand::new("x", iB), Operand::new("y", iB)]) + .operands_out(vec![Operand::new("a", iB), of_out.clone()]), + ); + + ig.push( + Inst::new( + "usub_overflow", + r#" + Subtract integers unsigned with overflow out. + ``of`` is set when the subtraction underflowed. + ```text + a &= x - y \pmod 2^B \\ + of &= x - y < 0 + ``` + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![Operand::new("x", iB), Operand::new("y", iB)]) + .operands_out(vec![Operand::new("a", iB), of_out.clone()]), + ); + + ig.push( + Inst::new( + "ssub_overflow", + r#" + Subtract integers signed with overflow out. + ``of`` is set when the subtraction over- or underflowed. + Polymorphic over all scalar integer types, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![Operand::new("x", iB), Operand::new("y", iB)]) + .operands_out(vec![Operand::new("a", iB), of_out.clone()]), + ); + + { + let NarrowScalar = &TypeVar::new( + "NarrowScalar", + "A scalar integer type up to 64 bits", + TypeSetBuilder::new().ints(8..64).build(), + ); + + ig.push( + Inst::new( + "umul_overflow", + r#" + Multiply integers unsigned with overflow out. + ``of`` is set when the multiplication overflowed. + ```text + a &= x * y \pmod 2^B \\ + of &= x * y > 2^B + ``` + Polymorphic over all scalar integer types except i128, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![ + Operand::new("x", NarrowScalar), + Operand::new("y", NarrowScalar), + ]) + .operands_out(vec![Operand::new("a", NarrowScalar), of_out.clone()]), + ); + + ig.push( + Inst::new( + "smul_overflow", + r#" + Multiply integers signed with overflow out. + ``of`` is set when the multiplication over- or underflowed. + Polymorphic over all scalar integer types except i128, but does not support vector + types. + "#, + &formats.binary, + ) + .operands_in(vec![ + Operand::new("x", NarrowScalar), + Operand::new("y", NarrowScalar), + ]) + .operands_out(vec![Operand::new("a", NarrowScalar), of_out.clone()]), + ); + } + } + let i32_64 = &TypeVar::new( "i32_64", "A 32 or 64-bit scalar integer type", diff --git a/cranelift/codegen/src/data_value.rs b/cranelift/codegen/src/data_value.rs index 6cba82c750b2..f19e302d0b0d 100644 --- a/cranelift/codegen/src/data_value.rs +++ b/cranelift/codegen/src/data_value.rs @@ -258,6 +258,18 @@ impl DataValue { (DataValue::F32(a), DataValue::F32(b)) => a.bits() == b.bits(), (DataValue::F64(a), DataValue::F64(b)) => a.bits() == b.bits(), + // when testing for bitwise equality, the sign information does not matter + (DataValue::I8(a), DataValue::U8(b)) => *a as u8 == *b, + (DataValue::U8(a), DataValue::I8(b)) => *a == *b as u8, + (DataValue::I16(a), DataValue::U16(b)) => *a as u16 == *b, + (DataValue::U16(a), DataValue::I16(b)) => *a == *b as u16, + (DataValue::I32(a), DataValue::U32(b)) => *a as u32 == *b, + (DataValue::U32(a), DataValue::I32(b)) => *a == *b as u32, + (DataValue::I64(a), DataValue::U64(b)) => *a as u64 == *b, + (DataValue::U64(a), DataValue::I64(b)) => *a == *b as u64, + (DataValue::I128(a), DataValue::U128(b)) => *a as u128 == *b, + (DataValue::U128(a), DataValue::I128(b)) => *a == *b as u128, + // We don't need to worry about F32x4 / F64x2 Since we compare V128 which is already the // raw bytes anyway (a, b) => a == b, diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 1e09a5ed96d3..b02af972dc6a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1004,6 +1004,10 @@ (MAdd) ;; Multiply-sub (MSub) + ;; Unsigned-Multiply-add + (UMAddL) + ;; Signed-Multiply-add + (SMAddL) )) (type MoveWideOp @@ -1727,6 +1731,9 @@ (decl pure partial lshl_from_u64 (Type u64) ShiftOpAndAmt) (extern constructor lshl_from_u64 lshl_from_u64) +(decl pure partial ashr_from_u64 (Type u64) ShiftOpAndAmt) +(extern constructor ashr_from_u64 ashr_from_u64) + (decl integral_ty (Type) Type) (extern extractor integral_ty integral_ty) @@ -1966,6 +1973,15 @@ (MInst.AluRRRShift (ALUOp.SubS) size (writable_zero_reg) src1 src2 shift))) +;; Helper for emitting `cmp` instructions, setting flags, with an arithmetic right-shifted +;; second operand register. +(decl cmp_rr_shift_asr (OperandSize Reg Reg u64) ProducesFlags) +(rule (cmp_rr_shift_asr size src1 src2 shift_amount) + (if-let shift (ashr_from_u64 $I64 shift_amount)) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.AluRRRShift (ALUOp.SubS) size (writable_zero_reg) + src1 src2 shift))) + ;; Helper for emitting `MInst.AluRRRExtend` instructions. (decl alu_rrr_extend (ALUOp Type Reg Reg ExtendOp) Reg) (rule (alu_rrr_extend op ty src1 src2 extend) @@ -1988,6 +2004,22 @@ (_ Unit (emit (MInst.AluRRRR op (operand_size ty) dst src1 src2 src3)))) dst)) +;; Helper for emitting paired `MInst.AluRRR` instructions +(decl alu_rrr_with_flags_paired (Type Reg Reg ALUOp) ProducesFlags) +(rule (alu_rrr_with_flags_paired ty src1 src2 alu_op) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ProducesFlags.ProducesFlagsReturnsResultWithConsumer + (MInst.AluRRR alu_op (operand_size ty) dst src1 src2) + dst))) + +;; Should only be used for AdcS and SbcS +(decl alu_rrr_with_flags_chained (Type Reg Reg ALUOp) ConsumesAndProducesFlags) +(rule (alu_rrr_with_flags_chained ty src1 src2 alu_op) + (let ((dst WritableReg (temp_writable_reg $I64))) + (ConsumesAndProducesFlags.ReturnsReg + (MInst.AluRRR alu_op (operand_size ty) dst src1 src2) + dst))) + ;; Helper for emitting `MInst.BitRR` instructions. (decl bit_rr (BitOp Type Reg) Reg) (rule (bit_rr op ty src) @@ -2335,7 +2367,7 @@ ;; immediately by the `MInst.CCmp` instruction. (decl ccmp (OperandSize Reg Reg NZCV Cond ProducesFlags) ProducesFlags) (rule (ccmp size rn rm nzcv cond inst_input) - (produces_flags_append inst_input (MInst.CCmp size rn rm nzcv cond))) + (produces_flags_concat inst_input (ProducesFlags.ProducesFlagsSideEffect (MInst.CCmp size rn rm nzcv cond)))) ;; Helper for generating `MInst.CCmpImm` instructions. (decl ccmp_imm (OperandSize Reg UImm5 NZCV Cond) ConsumesFlags) @@ -2411,6 +2443,14 @@ (decl msub (Type Reg Reg Reg) Reg) (rule (msub ty x y z) (alu_rrrr (ALUOp3.MSub) ty x y z)) +;; Helpers for generating `umaddl` instructions +(decl umaddl (Reg Reg Reg) Reg) +(rule (umaddl x y z) (alu_rrrr (ALUOp3.UMAddL) $I32 x y z)) + +;; Helpers for generating `smaddl` instructions +(decl smaddl (Reg Reg Reg) Reg) +(rule (smaddl x y z) (alu_rrrr (ALUOp3.SMAddL) $I32 x y z)) + ;; Helper for generating `uqadd` instructions. (decl uqadd (Reg Reg VectorSize) Reg) (rule (uqadd x y size) (vec_rrr (VecALUOp.Uqadd) x y size)) @@ -2620,6 +2660,9 @@ (decl orr_imm (Type Reg ImmLogic) Reg) (rule (orr_imm ty x y) (alu_rr_imm_logic (ALUOp.Orr) ty x y)) +(decl orr_shift (Type Reg Reg ShiftOpAndAmt) Reg) +(rule (orr_shift ty x y shift) (alu_rrr_shift (ALUOp.Orr) ty x y shift)) + (decl orr_vec (Reg Reg VectorSize) Reg) (rule (orr_vec x y size) (vec_rrr (VecALUOp.Orr) x y size)) @@ -3659,12 +3702,12 @@ (rm Reg (put_in_reg y))) (vec_cmp rn rm in_ty cond))) -;; Determines the appropriate extend op given the value type and whether it is signed. -(decl lower_extend_op (Type bool) ExtendOp) -(rule (lower_extend_op $I8 $true) (ExtendOp.SXTB)) -(rule (lower_extend_op $I16 $true) (ExtendOp.SXTH)) -(rule (lower_extend_op $I8 $false) (ExtendOp.UXTB)) -(rule (lower_extend_op $I16 $false) (ExtendOp.UXTH)) +;; Determines the appropriate extend op given the value type and the given ArgumentExtension. +(decl lower_extend_op (Type ArgumentExtension) ExtendOp) +(rule (lower_extend_op $I8 (ArgumentExtension.Sext)) (ExtendOp.SXTB)) +(rule (lower_extend_op $I16 (ArgumentExtension.Sext)) (ExtendOp.SXTH)) +(rule (lower_extend_op $I8 (ArgumentExtension.Uext)) (ExtendOp.UXTB)) +(rule (lower_extend_op $I16 (ArgumentExtension.Uext)) (ExtendOp.UXTH)) ;; Integers <= 64-bits. (rule -2 (lower_icmp_into_reg cond rn rm in_ty out_ty) @@ -3675,13 +3718,13 @@ (rule 1 (lower_icmp cond rn rm (fits_in_16 ty)) (if (signed_cond_code cond)) (let ((rn Reg (put_in_reg_sext32 rn))) - (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty $true)) cond))) + (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty (ArgumentExtension.Sext))) cond))) (rule -1 (lower_icmp cond rn (imm12_from_value rm) (fits_in_16 ty)) (let ((rn Reg (put_in_reg_zext32 rn))) (flags_and_cc (cmp_imm (operand_size ty) rn rm) cond))) (rule -2 (lower_icmp cond rn rm (fits_in_16 ty)) (let ((rn Reg (put_in_reg_zext32 rn))) - (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty $false)) cond))) + (flags_and_cc (cmp_extend (operand_size ty) rn rm (lower_extend_op ty (ArgumentExtension.Uext))) cond))) (rule -3 (lower_icmp cond rn (u64_from_iconst c) ty) (if (ty_int_ref_scalar_64 ty)) (lower_icmp_const cond rn c ty)) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index da0cb5b63a7b..ed0be97bc239 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -789,6 +789,14 @@ impl MachInstEmit for Inst { let (top11, bit15) = match alu_op { ALUOp3::MAdd => (0b0_00_11011_000, 0), ALUOp3::MSub => (0b0_00_11011_000, 1), + ALUOp3::UMAddL => { + debug_assert!(size == OperandSize::Size32); + (0b1_00_11011_1_01, 0) + } + ALUOp3::SMAddL => { + debug_assert!(size == OperandSize::Size32); + (0b1_00_11011_0_01, 0) + } }; let top11 = top11 | size.sf_bit() << 10; sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd)); diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 3e57cd976800..6ad69777243c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1086,6 +1086,30 @@ fn test_aarch64_binemit() { "4190039B", "msub x1, x2, x3, x4", )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp3::UMAddL, + size: OperandSize::Size32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4110A39B", + "umaddl x1, w2, w3, x4", + )); + insns.push(( + Inst::AluRRRR { + alu_op: ALUOp3::SMAddL, + size: OperandSize::Size32, + rd: writable_xreg(1), + rn: xreg(2), + rm: xreg(3), + ra: xreg(4), + }, + "4110239B", + "smaddl x1, w2, w3, x4", + )); insns.push(( Inst::AluRRR { alu_op: ALUOp::SMulH, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index bc0403f65aba..24e4780f967b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -1191,14 +1191,16 @@ impl Inst { rm, ra, } => { - let op = match alu_op { - ALUOp3::MAdd => "madd", - ALUOp3::MSub => "msub", + let (op, da_size) = match alu_op { + ALUOp3::MAdd => ("madd", size), + ALUOp3::MSub => ("msub", size), + ALUOp3::UMAddL => ("umaddl", OperandSize::Size64), + ALUOp3::SMAddL => ("smaddl", OperandSize::Size64), }; - let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rd = pretty_print_ireg(rd.to_reg(), da_size, allocs); let rn = pretty_print_ireg(rn, size, allocs); let rm = pretty_print_ireg(rm, size, allocs); - let ra = pretty_print_ireg(ra, size, allocs); + let ra = pretty_print_ireg(ra, da_size, allocs); format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra) } diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 8f36c79c2f0a..12c9a2a83590 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -2580,7 +2580,7 @@ ;; For values smaller than a register, we do a normal `add` with both arguments ;; sign extended. We then check if the output sign bit has flipped. (rule 0 (lower (has_type (fits_in_16 ty) (iadd_cout a b))) - (let ((extend ExtendOp (lower_extend_op ty $true)) + (let ((extend ExtendOp (lower_extend_op ty (ArgumentExtension.Sext))) ;; Instead of emitting two `sxt{b,h}` we do one as an instruction and ;; the other as an extend operation in the `add` instruction. @@ -2617,6 +2617,250 @@ (rule (lower (has_type (fits_in_64 ty) (uadd_overflow_trap a b tc))) (trap_if_overflow (add_with_flags_paired ty a b) tc)) +;;;; Helpers for `*_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; put a narrow value into a register and sign-/zero-extend depending on the ArgumentExtension +(decl put_in_reg_ext32 (Value ArgumentExtension) Reg) +(rule (put_in_reg_ext32 val (ArgumentExtension.Sext)) + (put_in_reg_sext32 val)) +(rule (put_in_reg_ext32 val (ArgumentExtension.Uext)) + (put_in_reg_zext32 val)) + +;; For narrow values emit a normal op with both arguments zero/sign extended. +;; Then check if the output is the same as itself zero/sign extended from the narrower width. +(decl overflow_op_small (Type Value Value ArgumentExtension ALUOp) InstOutput) +(rule (overflow_op_small ty a b arg_ext alu_op) + (let ((extend ExtendOp (lower_extend_op ty arg_ext)) + + ;; Instead of emitting two `{u,s}xt{b,h}` we do one as an instruction and + ;; the other as an extend operation in the alu_op. + ;; + ;; uxtb a_ext, a + ;; alu_op out, a_ext, b, {u,s}xtb + ;; cmp out, out, {u,s}xtb + ;; cset out_of, ne + (a_ext Reg (put_in_reg_ext32 a arg_ext)) + (out Reg (alu_rrr_extend alu_op ty a_ext b extend)) + (out_of Reg (with_flags_reg + (cmp_extend (OperandSize.Size32) out out extend) + (cset (Cond.Ne))))) + (output_pair + (value_reg out) + (value_reg out_of)))) + +;; For register sized op's just emit a op+cset, without further masking. +;; +;; op out, a, b +;; cset out_of, cond +;; +;; conds expected: +;; Hs: Carry set, unsigned overflow; Vs: Signed Over-/Underflow; +;; Lo: Carry clear, meaning no unsigned overflow. +;; (this is because subtraction is implemented as an add with the two's complement value on aarch64, meaning there is a sub-overflow if the add does not overflow) +(decl overflow_op_normal (Type Value Value ALUOp Cond) InstOutput) +(rule (overflow_op_normal ty a b alu_op cond) + (let ((out ValueRegs + (with_flags + (alu_rrr_with_flags_paired ty a b alu_op) + (cset_paired cond)))) + (output_pair + (value_regs_get out 0) + (value_regs_get out 1)))) + +;; For 128bit integers emit, for example, add+adcs+cset +(decl overflow_op_128 (Value Value ALUOp ALUOp Cond) InstOutput) +(rule (overflow_op_128 x y alu_op1 alu_op2 cond) + (let + ;; Get the high/low registers for `x`. + ((x_regs ValueRegs x) + (x_lo Reg (value_regs_get x_regs 0)) + (x_hi Reg (value_regs_get x_regs 1)) + + ;; Get the high/low registers for `y`. + (y_regs ValueRegs y) + (y_lo Reg (value_regs_get y_regs 0)) + (y_hi Reg (value_regs_get y_regs 1))) + ;; cannot use the with_flags helper here but it should be fine right now + (let + ((lo_inst ProducesFlags (alu_rrr_with_flags_paired $I64 x_lo y_lo alu_op1)) + (hi_inst ConsumesAndProducesFlags (alu_rrr_with_flags_chained $I64 x_hi y_hi alu_op2)) + (of_inst ConsumesFlags (cset_paired cond)) + + (result MultiReg (with_flags_chained lo_inst hi_inst of_inst))) + (multi_reg_to_pair_and_single result))) +) + +;;;; Rules for `uadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; For values smaller than a register, we do a normal `add` with both arguments +;; zero extended. We then check if the output is the same as itself zero extended. +(rule 1 (lower (has_type (fits_in_16 ty) (uadd_overflow a b))) + (overflow_op_small ty a b (ArgumentExtension.Uext) (ALUOp.Add))) + +;; For register sized add's we just emit a adds+cset, without further masking. +(rule 2 (lower (has_type (ty_32_or_64 ty) (uadd_overflow a b))) + (overflow_op_normal ty a b (ALUOp.AddS) (Cond.Hs))) + +;; For 128bit integers we emit add+adcs+cset +(rule 0 (lower (has_type $I128 (uadd_overflow x y))) + (overflow_op_128 x y (ALUOp.AddS) (ALUOp.AdcS) (Cond.Hs))) + +;;;; Rules for `sadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; sxt{b,h} a_ext, a +;; add out, a_ext, b, sxt{b,h} +;; cmp out, out, sxt{b,h} +;; cset of, ne +(rule 1 (lower (has_type (fits_in_16 ty) (sadd_overflow a b))) + (overflow_op_small ty a b (ArgumentExtension.Sext) (ALUOp.Add))) + +;; adds a, b +;; cset of, vs +(rule 2 (lower (has_type (ty_32_or_64 ty) (sadd_overflow a b))) + (overflow_op_normal ty a b (ALUOp.AddS) (Cond.Vs))) + +;; adds x_lo, y_lo +;; addcs x_hi, y_hi +;; cset of, vs +(rule 0 (lower (has_type $I128 (sadd_overflow x y))) + (overflow_op_128 x y (ALUOp.AddS) (ALUOp.AdcS) (Cond.Vs))) + +;;;; Rules for `usub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; uxt{b,h} a_ext, a +;; sub out, a_ext, b, ext{b,h} +;; cmp out, out, uxt{b,h} +;; cset of, ne +(rule 1 (lower (has_type (fits_in_16 ty) (usub_overflow a b))) + (overflow_op_small ty a b (ArgumentExtension.Uext) (ALUOp.Sub))) + +;; subs a, b +;; cset of, lo +(rule 2 (lower (has_type (ty_32_or_64 ty) (usub_overflow a b))) + (overflow_op_normal ty a b (ALUOp.SubS) (Cond.Lo))) + +;; subs x_lo, y_lo +;; sbcs x_hi, y_hi +;; cset of, lo +(rule 0 (lower (has_type $I128 (usub_overflow x y))) + (overflow_op_128 x y (ALUOp.SubS) (ALUOp.SbcS) (Cond.Lo))) + +;;;; Rules for `ssub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; sxt{b,h} a_ext, a +;; sub out, a_ext, b, sxt{b,h} +;; cmp out, out, sxt{b,h} +;; cset of, ne +(rule 1 (lower (has_type (fits_in_16 ty) (ssub_overflow a b))) + (overflow_op_small ty a b (ArgumentExtension.Sext) (ALUOp.Sub))) + +;; subs a, b +;; cset of, vs +(rule 2 (lower (has_type (ty_32_or_64 ty) (ssub_overflow a b))) + (overflow_op_normal ty a b (ALUOp.SubS) (Cond.Vs))) + +;; subs x_lo, y_lo +;; sbcs x_hi, y_hi +;; cset of, vs +(rule 0 (lower (has_type $I128 (ssub_overflow x y))) + (overflow_op_128 x y (ALUOp.SubS) (ALUOp.SbcS) (Cond.Vs))) + +;;;; Rules for `umul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; uxt{b,h} a_ext, a +;; uxt{b,h} b_ext, b +;; mul out, a_ext, b_ext +;; cmp out, out, uxt{b,h} +;; cset of, ne +(rule 1 (lower (has_type (fits_in_16 ty) (umul_overflow a b))) + (let ((extend ExtendOp (lower_extend_op ty (ArgumentExtension.Uext))) + + (a_uext Reg (put_in_reg_zext32 a)) + (b_uext Reg (put_in_reg_zext32 b)) + (out Reg (madd ty a_uext b_uext (zero_reg))) + (out_of Reg (with_flags_reg + (cmp_extend (OperandSize.Size32) out out extend) + (cset (Cond.Ne))))) + (output_pair + (value_reg out) + (value_reg out_of)))) + +;; umull out, a, b +;; cmp out, out, uxtw +;; cset of, ne +(rule 2 (lower (has_type $I32 (umul_overflow a b))) + (let ( + (out Reg (umaddl a b (zero_reg))) + (out_of Reg (with_flags_reg + (cmp_extend (OperandSize.Size64) out out (ExtendOp.UXTW)) + (cset (Cond.Ne))))) + (output_pair + (value_reg out) + (value_reg out_of)))) + +;; mul out, a, b +;; umulh tmp, a, b +;; cmp tmp, #0 +;; cset of, ne +(rule 2 (lower (has_type $I64 (umul_overflow a b))) + (let ( + (out Reg (madd $I64 a b (zero_reg))) + (tmp Reg (umulh $I64 a b)) + (out_of Reg (with_flags_reg + (cmp64_imm tmp (u8_into_imm12 0)) + (cset (Cond.Ne))))) + (output_pair + (value_reg out) + (value_reg out_of)))) + +;;;; Rules for `smul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; sxt{b,h} a_ext, a +;; sxt{b,h} b_ext, b +;; mul out, a_ext, b_ext +;; cmp out, out, sxt{b,h} +;; cset of, ne +(rule 1 (lower (has_type (fits_in_16 ty) (smul_overflow a b))) + (let ((extend ExtendOp (lower_extend_op ty (ArgumentExtension.Sext))) + + (a_sext Reg (put_in_reg_sext32 a)) + (b_sext Reg (put_in_reg_sext32 b)) + (out Reg (madd ty a_sext b_sext (zero_reg))) + (out_of Reg (with_flags_reg + (cmp_extend (OperandSize.Size32) out out extend) + (cset (Cond.Ne))))) + (output_pair + (value_reg out) + (value_reg out_of)))) + +;; smull out, a, b +;; cmp out, out, sxtw +;; cset of, ne +(rule 2 (lower (has_type $I32 (smul_overflow a b))) + (let ( + (out Reg (smaddl a b (zero_reg))) + (out_of Reg (with_flags_reg + (cmp_extend (OperandSize.Size64) out out (ExtendOp.SXTW)) + (cset (Cond.Ne))))) + (output_pair + (value_reg out) + (value_reg out_of)))) + +;; mul out, a, b +;; smulh tmp, a, b +;; cmp tmp, out, ASR #63 +;; cset of, ne +(rule 2 (lower (has_type $I64 (smul_overflow a b))) + (let ( + (out Reg (madd $I64 a b (zero_reg))) + (tmp Reg (smulh $I64 a b)) + (out_of Reg (with_flags_reg + (cmp_rr_shift_asr (OperandSize.Size64) tmp out 63) + (cset (Cond.Ne))))) + (output_pair + (value_reg out) + (value_reg out_of)))) + ;;; Rules for `tls_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (tls_model (TlsModel.ElfGd)) (tls_value (symbol_value_data name _ _)))) diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index c5d661d24ab4..8310ff28d0bd 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -155,6 +155,17 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { } } + fn ashr_from_u64(&mut self, ty: Type, n: u64) -> Option { + let shiftimm = ShiftOpShiftImm::maybe_from_shift(n)?; + let shiftee_bits = ty_bits(ty); + if shiftee_bits <= std::u8::MAX as usize { + let shiftimm = shiftimm.mask(shiftee_bits as u8); + Some(ShiftOpAndAmt::new(ShiftOp::ASR, shiftimm)) + } else { + None + } + } + fn integral_ty(&mut self, ty: Type) -> Option { match ty { I8 | I16 | I32 | I64 | R64 => Some(ty), diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 514d958c6361..2501d02aca02 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -2303,9 +2303,6 @@ (test Reg (rv_srli sum (imm12_const (ty_bits ty))))) (value_regs sum test))) -(decl inst_output_get (InstOutput u8) ValueRegs) -(extern constructor inst_output_get inst_output_get) - (decl label_to_br_target (MachLabel) BranchTarget) (extern constructor label_to_br_target label_to_br_target) diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index 1c088b4beab9..5a3195ee2164 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -303,10 +303,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> { self.backend.isa_flags.has_zbs() } - fn inst_output_get(&mut self, x: InstOutput, index: u8) -> ValueRegs { - x[index as usize] - } - fn move_f_to_x(&mut self, r: Reg, ty: Type) -> Reg { let result = self.temp_writable_reg(I64); self.emit(&gen_move(result, I64, r, ty)); diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index b11efad40226..bde21d48dbb7 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -14,14 +14,14 @@ ;; Integer instructions. ;; Integer arithmetic/bit-twiddling. - (AluRmiR (size OperandSize) ;; 4 or 8 + (AluRmiR (size OperandSize) ;; 1, 2, 4 or 8 (op AluRmiROpcode) (src1 Gpr) (src2 GprMemImm) (dst WritableGpr)) ;; Integer arithmetic read-modify-write on memory. - (AluRM (size OperandSize) ;; 4 or 8 + (AluRM (size OperandSize) ;; 1, 2, 4 or 8 (op AluRmiROpcode) (src1_dst SyntheticAmode) (src2 Gpr)) @@ -86,6 +86,12 @@ (dst_lo WritableGpr) (dst_hi WritableGpr)) + ;; x64 'mul' instruction but it only outputs the low half + (UMulLo (size OperandSize) + (src1 Gpr) + (src2 GprMem) + (dst WritableGpr)) + ;; A synthetic instruction sequence used as part of the lowering of the ;; `srem` instruction which returns 0 if the divisor is -1 and ;; otherwise executes an `idiv` instruction. @@ -2118,6 +2124,29 @@ dst) dst))) +(decl x64_alurmi_with_flags_paired (AluRmiROpcode Type Gpr GprMemImm) ProducesFlags) +(rule (x64_alurmi_with_flags_paired opc (fits_in_64 ty) src1 src2) + (let ((dst WritableGpr (temp_writable_gpr))) + (ProducesFlags.ProducesFlagsReturnsResultWithConsumer + (MInst.AluRmiR (raw_operand_size_of_type ty) + opc + src1 + src2 + dst) + dst))) + +;; Should only be used for Adc and Sbb +(decl x64_alurmi_with_flags_chained (AluRmiROpcode Type Gpr GprMemImm) ConsumesAndProducesFlags) +(rule (x64_alurmi_with_flags_chained opc (fits_in_64 ty) src1 src2) + (let ((dst WritableGpr (temp_writable_gpr))) + (ConsumesAndProducesFlags.ReturnsReg + (MInst.AluRmiR (raw_operand_size_of_type ty) + opc + src1 + src2 + dst) + dst))) + ;; Helper for creating `adc` instructions. (decl x64_adc_paired (Type Gpr GprMemImm) ConsumesFlags) (rule (x64_adc_paired ty src1 src2) @@ -2170,6 +2199,24 @@ src1 src2)) +;; Helper for creating `umullo` instructions. +(decl x64_umullo (Type Gpr GprMem) Gpr) +(rule (x64_umullo ty src1 src2) + (let ((dst WritableGpr (temp_writable_gpr)) + (size OperandSize (raw_operand_size_of_type ty)) + (_ Unit (emit (MInst.UMulLo size src1 src2 dst)))) + dst)) + +(decl x64_umullo_with_flags_paired (Type Gpr GprMem) ProducesFlags) +(rule (x64_umullo_with_flags_paired ty src1 src2) + (let ((dst WritableGpr (temp_writable_gpr))) + (ProducesFlags.ProducesFlagsReturnsResultWithConsumer + (MInst.UMulLo (raw_operand_size_of_type ty) + src1 + src2 + dst) + dst))) + ;; Helper for emitting `and` instructions. (decl x64_and (Type Gpr GprMemImm) Gpr) (rule (x64_and ty src1 src2) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index c2be0409e73f..53c4b6808b41 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -154,35 +154,69 @@ pub(crate) fn emit( debug_assert_eq!(src1, reg_g); let src2 = src2.clone().to_reg_mem_imm().with_allocs(allocs); - let rex = RexFlags::from(*size); + let prefix = if *size == OperandSize::Size16 { + LegacyPrefixes::_66 + } else { + LegacyPrefixes::None + }; + + let mut rex = RexFlags::from(*size); if *op == AluRmiROpcode::Mul { // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so // we have to special-case it. - match src2 { - RegMemImm::Reg { reg: reg_e } => { - emit_std_reg_reg(sink, LegacyPrefixes::None, 0x0FAF, 2, reg_g, reg_e, rex); - } + if *size == OperandSize::Size8 { + match src2 { + RegMemImm::Reg { reg: reg_e } => { + debug_assert!(reg_e.is_real()); + rex.always_emit_if_8bit_needed(reg_e); + let enc_e = int_reg_enc(reg_e); + emit_std_enc_enc(sink, LegacyPrefixes::None, 0xF6, 1, 5, enc_e, rex); + } - RegMemImm::Mem { addr } => { - let amode = addr.finalize(state, sink); - emit_std_reg_mem( - sink, - LegacyPrefixes::None, - 0x0FAF, - 2, - reg_g, - &amode, - rex, - 0, - ); + RegMemImm::Mem { addr } => { + let amode = addr.finalize(state, sink); + emit_std_enc_mem( + sink, + LegacyPrefixes::None, + 0xF6, + 1, + 5, + &amode, + rex, + 0, + ); + } + + RegMemImm::Imm { .. } => { + panic!("Cannot emit 8bit imul with 8bit immediate"); + } } + } else { + match src2 { + RegMemImm::Reg { reg: reg_e } => { + emit_std_reg_reg(sink, prefix, 0x0FAF, 2, reg_g, reg_e, rex); + } - RegMemImm::Imm { simm32 } => { - let use_imm8 = low8_will_sign_extend_to_32(simm32); - let opcode = if use_imm8 { 0x6B } else { 0x69 }; - // Yes, really, reg_g twice. - emit_std_reg_reg(sink, LegacyPrefixes::None, opcode, 1, reg_g, reg_g, rex); - emit_simm(sink, if use_imm8 { 1 } else { 4 }, simm32); + RegMemImm::Mem { addr } => { + let amode = addr.finalize(state, sink); + emit_std_reg_mem(sink, prefix, 0x0FAF, 2, reg_g, &amode, rex, 0); + } + + RegMemImm::Imm { simm32 } => { + let imm_size = if low8_will_sign_extend_to_32(simm32) { + 1 + } else { + if *size == OperandSize::Size16 { + 2 + } else { + 4 + } + }; + let opcode = if imm_size == 1 { 0x6B } else { 0x69 }; + // Yes, really, reg_g twice. + emit_std_reg_reg(sink, prefix, opcode, 1, reg_g, reg_g, rex); + emit_simm(sink, imm_size, simm32); + } } } } else { @@ -197,52 +231,63 @@ pub(crate) fn emit( AluRmiROpcode::Mul => panic!("unreachable"), }; + let (opcode_r, opcode_m) = if *size == OperandSize::Size8 { + (opcode_r - 1, opcode_m - 1) + } else { + (opcode_r, opcode_m) + }; + + if *size == OperandSize::Size8 { + debug_assert!(reg_g.is_real()); + rex.always_emit_if_8bit_needed(reg_g); + } + match src2 { RegMemImm::Reg { reg: reg_e } => { + if *size == OperandSize::Size8 { + debug_assert!(reg_e.is_real()); + rex.always_emit_if_8bit_needed(reg_e); + } + // GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R // duality). Do this too, so as to be able to compare generated machine // code easily. - emit_std_reg_reg( - sink, - LegacyPrefixes::None, - opcode_r, - 1, - reg_e, - reg_g, - rex, - ); + emit_std_reg_reg(sink, prefix, opcode_r, 1, reg_e, reg_g, rex); } RegMemImm::Mem { addr } => { let amode = addr.finalize(state, sink); // Here we revert to the "normal" G-E ordering. - emit_std_reg_mem( - sink, - LegacyPrefixes::None, - opcode_m, - 1, - reg_g, - &amode, - rex, - 0, - ); + emit_std_reg_mem(sink, prefix, opcode_m, 1, reg_g, &amode, rex, 0); } RegMemImm::Imm { simm32 } => { - let use_imm8 = low8_will_sign_extend_to_32(simm32); - let opcode = if use_imm8 { 0x83 } else { 0x81 }; + let imm_size = if *size == OperandSize::Size8 { + 1 + } else { + if low8_will_sign_extend_to_32(simm32) { + 1 + } else { + if *size == OperandSize::Size16 { + 2 + } else { + 4 + } + } + }; + + let opcode = if *size == OperandSize::Size8 { + 0x80 + } else if low8_will_sign_extend_to_32(simm32) { + 0x83 + } else { + 0x81 + }; + // And also here we use the "normal" G-E ordering. let enc_g = int_reg_enc(reg_g); - emit_std_enc_enc( - sink, - LegacyPrefixes::None, - opcode, - 1, - subopcode_i, - enc_g, - rex, - ); - emit_simm(sink, if use_imm8 { 1 } else { 4 }, simm32); + emit_std_enc_enc(sink, prefix, opcode, 1, subopcode_i, enc_g, rex); + emit_simm(sink, imm_size, simm32); } } } @@ -274,7 +319,6 @@ pub(crate) fn emit( let src2 = allocs.next(src2.to_reg()); let src1_dst = src1_dst.finalize(state, sink).with_allocs(allocs); - assert!(*size == OperandSize::Size32 || *size == OperandSize::Size64); let opcode = match op { AluRmiROpcode::Add => 0x01, AluRmiROpcode::Sub => 0x29, @@ -283,17 +327,26 @@ pub(crate) fn emit( AluRmiROpcode::Xor => 0x31, _ => panic!("Unsupported read-modify-write ALU opcode"), }; + + let prefix = if *size == OperandSize::Size16 { + LegacyPrefixes::_66 + } else { + LegacyPrefixes::None + }; + let opcode = if *size == OperandSize::Size8 { + opcode - 1 + } else { + opcode + }; + + let mut rex = RexFlags::from(*size); + if *size == OperandSize::Size8 { + debug_assert!(src2.is_real()); + rex.always_emit_if_8bit_needed(src2); + } + let enc_g = int_reg_enc(src2); - emit_std_enc_mem( - sink, - LegacyPrefixes::None, - opcode, - 1, - enc_g, - &src1_dst, - RexFlags::from(*size), - 0, - ); + emit_std_enc_mem(sink, prefix, opcode, 1, enc_g, &src1_dst, rex, 0); } Inst::AluRmRVex { @@ -521,6 +574,45 @@ pub(crate) fn emit( } } + Inst::UMulLo { + size, + src1, + src2, + dst, + } => { + let src1 = allocs.next(src1.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(src1, regs::rax()); + debug_assert_eq!(dst, regs::rax()); + + let mut rex = RexFlags::from(*size); + let prefix = match size { + OperandSize::Size16 => LegacyPrefixes::_66, + _ => LegacyPrefixes::None, + }; + + let opcode = if *size == OperandSize::Size8 { + 0xF6 + } else { + 0xF7 + }; + + match src2.clone().to_reg_mem() { + RegMem::Reg { reg } => { + let reg = allocs.next(reg); + if *size == OperandSize::Size8 { + rex.always_emit_if_8bit_needed(reg); + } + let reg_e = int_reg_enc(reg); + emit_std_enc_enc(sink, prefix, opcode, 1, 4, reg_e, rex); + } + RegMem::Mem { addr: src } => { + let amode = src.finalize(state, sink).with_allocs(allocs); + emit_std_enc_mem(sink, prefix, opcode, 1, 4, &amode, rex, 0); + } + } + } + Inst::SignExtendData { size, src, dst } => { let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index ef6721b8b1d1..f8dc42e21d03 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -78,6 +78,15 @@ impl Inst { } } + fn umul_lo(size: OperandSize, operand: RegMem) -> Inst { + Inst::UMulLo { + size, + src1: Gpr::new(regs::rax()).unwrap(), + src2: GprMem::new(operand).unwrap(), + dst: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), + } + } + fn xmm_rm_r_evex(op: Avx512Opcode, src1: RegMem, src2: Reg, dst: Writable) -> Self { src1.assert_regclass_is(RegClass::Float); debug_assert!(src2.class() == RegClass::Float); @@ -1535,6 +1544,415 @@ fn test_x64_emit() { "imull %esi, $76543210, %esi", )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Add, + RegMemImm::reg(rax), + w_rdx, + ), + "6601C2", + "addw %dx, %ax, %dx", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Add, + RegMemImm::imm(10), + w_rdx, + ), + "6683C20A", + "addw %dx, $10, %dx", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Add, + RegMemImm::imm(-512i32 as u32), + w_rdx, + ), + "6681C200FE", + "addw %dx, $-512, %dx", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Sub, + RegMemImm::reg(rax), + w_r12, + ), + "664129C4", + "subw %r12w, %ax, %r12w", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Xor, + RegMemImm::reg(r10), + w_rcx, + ), + "664431D1", + "xorw %cx, %r10w, %cx", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::And, + RegMemImm::reg(r10), + w_r14, + ), + "664521D6", + "andw %r14w, %r10w, %r14w", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::And, + RegMemImm::imm(10), + w_r14, + ), + "664183E60A", + "andw %r14w, $10, %r14w", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::And, + RegMemImm::imm(-512i32 as u32), + w_r14, + ), + "664181E600FE", + "andw %r14w, $-512, %r14w", + )); + + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Mul, + RegMemImm::imm(10), + w_rax, + ), + "666BC00A", + "imulw %ax, $10, %ax", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Mul, + RegMemImm::imm(-512i32 as u32), + w_rax, + ), + "6669C000FE", + "imulw %ax, $-512, %ax", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Mul, + RegMemImm::imm(10), + w_r11, + ), + "66456BDB0A", + "imulw %r11w, $10, %r11w", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Mul, + RegMemImm::imm(-512i32 as u32), + w_r11, + ), + "664569DB00FE", + "imulw %r11w, $-512, %r11w", + )); + + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Mul, + RegMemImm::reg(rdx), + w_rax, + ), + "660FAFC2", + "imulw %ax, %dx, %ax", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Mul, + RegMemImm::reg(r12), + w_rax, + ), + "66410FAFC4", + "imulw %ax, %r12w, %ax", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Mul, + RegMemImm::reg(rdx), + w_r11, + ), + "66440FAFDA", + "imulw %r11w, %dx, %r11w", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size16, + AluRmiROpcode::Mul, + RegMemImm::reg(r12), + w_r11, + ), + "66450FAFDC", + "imulw %r11w, %r12w, %r11w", + )); + + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Add, + RegMemImm::imm(10), + w_rax, + ), + "80C00A", // there is theoretically 040A as a valid encoding also + "addb %al, $10, %al", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Add, + RegMemImm::reg(rcx), + w_rax, + ), + "00C8", + "addb %al, %cl, %al", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Add, + RegMemImm::reg(rsi), + w_rax, + ), + "4000F0", + "addb %al, %sil, %al", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Add, + RegMemImm::reg(r11), + w_rax, + ), + "4400D8", + "addb %al, %r11b, %al", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Add, + RegMemImm::reg(r15), + w_rax, + ), + "4400F8", + "addb %al, %r15b, %al", + )); + + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Sub, + RegMemImm::imm(10), + _w_rbp, + ), + "4080ED0A", + "subb %bpl, $10, %bpl", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Sub, + RegMemImm::reg(rcx), + _w_rbp, + ), + "4028CD", + "subb %bpl, %cl, %bpl", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Sub, + RegMemImm::reg(rsi), + _w_rbp, + ), + "4028F5", + "subb %bpl, %sil, %bpl", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Sub, + RegMemImm::reg(r11), + _w_rbp, + ), + "4428DD", + "subb %bpl, %r11b, %bpl", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Sub, + RegMemImm::reg(r15), + _w_rbp, + ), + "4428FD", + "subb %bpl, %r15b, %bpl", + )); + + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Xor, + RegMemImm::imm(10), + _w_r10, + ), + "4180F20A", + "xorb %r10b, $10, %r10b", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Xor, + RegMemImm::reg(rcx), + _w_r10, + ), + "4130CA", + "xorb %r10b, %cl, %r10b", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Xor, + RegMemImm::reg(rsi), + _w_r10, + ), + "4130F2", + "xorb %r10b, %sil, %r10b", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Xor, + RegMemImm::reg(r11), + _w_r10, + ), + "4530DA", + "xorb %r10b, %r11b, %r10b", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Xor, + RegMemImm::reg(r15), + _w_r10, + ), + "4530FA", + "xorb %r10b, %r15b, %r10b", + )); + + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::And, + RegMemImm::imm(10), + w_r15, + ), + "4180E70A", + "andb %r15b, $10, %r15b", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::And, + RegMemImm::reg(rcx), + w_r15, + ), + "4120CF", + "andb %r15b, %cl, %r15b", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::And, + RegMemImm::reg(rsi), + w_r15, + ), + "4120F7", + "andb %r15b, %sil, %r15b", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::And, + RegMemImm::reg(r11), + w_r15, + ), + "4520DF", + "andb %r15b, %r11b, %r15b", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::And, + RegMemImm::reg(r15), + w_r15, + ), + "4520FF", + "andb %r15b, %r15b, %r15b", + )); + + // the 8bit imul has rax as fixed dst + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Mul, + RegMemImm::reg(rcx), + w_rax, + ), + "F6E9", + "imulb %al, %cl, %al", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Mul, + RegMemImm::reg(rbp), + w_rax, + ), + "40F6ED", + "imulb %al, %bpl, %al", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Mul, + RegMemImm::reg(r10), + w_rax, + ), + "41F6EA", + "imulb %al, %r10b, %al", + )); + insns.push(( + Inst::alu_rmi_r( + OperandSize::Size8, + AluRmiROpcode::Mul, + RegMemImm::reg(r15), + w_rax, + ), + "41F6EF", + "imulb %al, %r15b, %al", + )); + // ======================================================== // AluRM @@ -1654,6 +2072,68 @@ fn test_x64_emit() { "xorq %rax, 0(%rbp)", )); + insns.push(( + Inst::AluRM { + size: OperandSize::Size16, + op: AluRmiROpcode::Add, + src1_dst: Amode::imm_reg(0, rbp).into(), + src2: Gpr::new(rax).unwrap(), + }, + "66014500", + "addw %ax, 0(%rbp)", + )); + insns.push(( + Inst::AluRM { + size: OperandSize::Size16, + op: AluRmiROpcode::Sub, + src1_dst: Amode::imm_reg(0, rbp).into(), + src2: Gpr::new(r12).unwrap(), + }, + "6644296500", + "subw %r12w, 0(%rbp)", + )); + + insns.push(( + Inst::AluRM { + size: OperandSize::Size8, + op: AluRmiROpcode::Add, + src1_dst: Amode::imm_reg(0, rbp).into(), + src2: Gpr::new(rax).unwrap(), + }, + "004500", + "addb %al, 0(%rbp)", + )); + insns.push(( + Inst::AluRM { + size: OperandSize::Size8, + op: AluRmiROpcode::Sub, + src1_dst: Amode::imm_reg(0, rbp).into(), + src2: Gpr::new(rbp).unwrap(), + }, + "40286D00", + "subb %bpl, 0(%rbp)", + )); + insns.push(( + Inst::AluRM { + size: OperandSize::Size8, + op: AluRmiROpcode::Xor, + src1_dst: Amode::imm_reg(0, rbp).into(), + src2: Gpr::new(r10).unwrap(), + }, + "44305500", + "xorb %r10b, 0(%rbp)", + )); + insns.push(( + Inst::AluRM { + size: OperandSize::Size8, + op: AluRmiROpcode::And, + src1_dst: Amode::imm_reg(0, rbp).into(), + src2: Gpr::new(r15).unwrap(), + }, + "44207D00", + "andb %r15b, 0(%rbp)", + )); + // ======================================================== // UnaryRmR @@ -1864,6 +2344,59 @@ fn test_x64_emit() { "mul %rax, %rdi, %rax, %rdx", )); + // ======================================================== + // UMulLo + insns.push(( + Inst::umul_lo(OperandSize::Size64, RegMem::reg(regs::rdx())), + "48F7E2", + "mulq %rax, %rdx, %rax", + )); + insns.push(( + Inst::umul_lo(OperandSize::Size64, RegMem::reg(regs::r12())), + "49F7E4", + "mulq %rax, %r12, %rax", + )); + insns.push(( + Inst::umul_lo(OperandSize::Size32, RegMem::reg(regs::rdx())), + "F7E2", + "mull %eax, %edx, %eax", + )); + insns.push(( + Inst::umul_lo(OperandSize::Size32, RegMem::reg(regs::r12())), + "41F7E4", + "mull %eax, %r12d, %eax", + )); + insns.push(( + Inst::umul_lo(OperandSize::Size16, RegMem::reg(regs::rdx())), + "66F7E2", + "mulw %ax, %dx, %ax", + )); + insns.push(( + Inst::umul_lo(OperandSize::Size16, RegMem::reg(regs::r12())), + "6641F7E4", + "mulw %ax, %r12w, %ax", + )); + insns.push(( + Inst::umul_lo(OperandSize::Size8, RegMem::reg(regs::rdx())), + "F6E2", + "mulb %al, %dl, %al", + )); + insns.push(( + Inst::umul_lo(OperandSize::Size8, RegMem::reg(regs::rdi())), + "40F6E7", + "mulb %al, %dil, %al", + )); + insns.push(( + Inst::umul_lo(OperandSize::Size8, RegMem::reg(regs::r9())), + "41F6E1", + "mulb %al, %r9b, %al", + )); + insns.push(( + Inst::umul_lo(OperandSize::Size8, RegMem::reg(regs::r12())), + "41F6E4", + "mulb %al, %r12b, %al", + )); + // ======================================================== // Imm_R // diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 984cbbe8fbcc..1d82212b0380 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -101,6 +101,7 @@ impl Inst { | Inst::MovsxRmR { .. } | Inst::MovzxRmR { .. } | Inst::MulHi { .. } + | Inst::UMulLo { .. } | Inst::Neg { .. } | Inst::Not { .. } | Inst::Nop { .. } @@ -180,7 +181,6 @@ impl Inst { src: RegMemImm, dst: Writable, ) -> Self { - debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); src.assert_regclass_is(RegClass::Int); debug_assert!(dst.to_reg().class() == RegClass::Int); Self::AluRmiR { @@ -657,6 +657,7 @@ impl PrettyPrint for Inst { .to_string() } + #[allow(dead_code)] fn suffix_lqb(size: OperandSize) -> String { match size { OperandSize::Size32 => "l", @@ -691,7 +692,7 @@ impl PrettyPrint for Inst { let src2 = src2.pretty_print(size_bytes, allocs); format!( "{} {}, {}, {}", - ljustify2(op.to_string(), suffix_lqb(*size)), + ljustify2(op.to_string(), suffix_bwlq(*size)), src1, src2, dst @@ -716,7 +717,7 @@ impl PrettyPrint for Inst { let src1_dst = src1_dst.pretty_print(size_bytes, allocs); format!( "{} {}, {}", - ljustify2(op.to_string(), suffix_lqb(*size)), + ljustify2(op.to_string(), suffix_bwlq(*size)), src2, src1_dst, ) @@ -849,6 +850,24 @@ impl PrettyPrint for Inst { ) } + Inst::UMulLo { + size, + src1, + src2, + dst, + } => { + let src1 = pretty_print_reg(src1.to_reg(), size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + let src2 = src2.pretty_print(size.to_bytes(), allocs); + format!( + "{} {}, {}, {}", + ljustify2("mul".to_string(), suffix_bwlq(*size)), + src1, + src2, + dst, + ) + } + Inst::CheckedSRemSeq { size, divisor, @@ -1854,11 +1873,23 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol // method above. match inst { Inst::AluRmiR { - src1, src2, dst, .. + size, + op, + src1, + src2, + dst, + .. } => { - collector.reg_use(src1.to_reg()); - collector.reg_reuse_def(dst.to_writable_reg(), 0); - src2.get_operands(collector); + if *size == OperandSize::Size8 && *op == AluRmiROpcode::Mul { + // 8-bit imul has RAX as a fixed input/output + collector.reg_fixed_use(src1.to_reg(), regs::rax()); + collector.reg_fixed_def(dst.to_writable_reg(), regs::rax()); + src2.get_operands(collector); + } else { + collector.reg_use(src1.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); + src2.get_operands(collector); + } } Inst::AluConstOp { dst, .. } => collector.reg_def(dst.to_writable_reg()), Inst::AluRM { src1_dst, src2, .. } => { @@ -1925,6 +1956,20 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_fixed_def(dst_hi.to_writable_reg(), regs::rdx()); src2.get_operands(collector); } + Inst::UMulLo { + size, + src1, + src2, + dst, + .. + } => { + collector.reg_fixed_use(src1.to_reg(), regs::rax()); + collector.reg_fixed_def(dst.to_writable_reg(), regs::rax()); + if *size != OperandSize::Size8 { + collector.reg_clobbers(PRegSet::empty().with(regs::gpr_preg(regs::ENC_RDX))); + } + src2.get_operands(collector); + } Inst::SignExtendData { size, src, dst } => { match size { OperandSize::Size8 => { diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 3e50417d0f0e..fab850035de0 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -111,6 +111,87 @@ (output_pair (value_regs_get results 0) (value_regs_get results 1)))) +;;;; Helpers for `*_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl construct_overflow_op (CC ProducesFlags) InstOutput) +(rule (construct_overflow_op cc inst) + (let ((results ValueRegs (with_flags inst + (x64_setcc_paired cc)))) + (output_pair (value_regs_get results 0) + (value_regs_get results 1)))) + +(decl construct_overflow_op_alu (Type CC AluRmiROpcode Gpr GprMemImm) InstOutput) +(rule (construct_overflow_op_alu ty cc alu_op src1 src2) + (construct_overflow_op cc (x64_alurmi_with_flags_paired alu_op ty src1 src2))) + +;; This essentially creates +;; alu_ x_lo, y_lo +;; alu_ x_hi, y_hi +;; set r8 +(decl construct_overflow_op_alu_128 (CC AluRmiROpcode AluRmiROpcode Value Value) InstOutput) +(rule (construct_overflow_op_alu_128 cc op1 op2 x y) + ;; Get the high/low registers for `x`. + (let ((x_regs ValueRegs x) + (x_lo Gpr (value_regs_get_gpr x_regs 0)) + (x_hi Gpr (value_regs_get_gpr x_regs 1))) + ;; Get the high/low registers for `y`. + (let ((y_regs ValueRegs y) + (y_lo Gpr (value_regs_get_gpr y_regs 0)) + (y_hi Gpr (value_regs_get_gpr y_regs 1))) + (let ((lo_inst ProducesFlags (x64_alurmi_with_flags_paired op1 $I64 x_lo y_lo)) + (hi_inst ConsumesAndProducesFlags (x64_alurmi_with_flags_chained op2 $I64 x_hi y_hi)) + (of_inst ConsumesFlags (x64_setcc_paired cc)) + + (result MultiReg (with_flags_chained lo_inst hi_inst of_inst))) + (multi_reg_to_pair_and_single result))))) + +;;;; Rules for `uadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 1 (lower (uadd_overflow x y @ (value_type (fits_in_64 ty)))) + (construct_overflow_op_alu ty (CC.B) (AluRmiROpcode.Add) x y)) + +;; i128 gets lowered into adc and add +(rule 0 (lower (uadd_overflow x y @ (value_type $I128))) + (construct_overflow_op_alu_128 (CC.B) (AluRmiROpcode.Add) (AluRmiROpcode.Adc) x y)) + +;;;; Rules for `sadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 1 (lower (sadd_overflow x y @ (value_type (fits_in_64 ty)))) + (construct_overflow_op_alu ty (CC.O) (AluRmiROpcode.Add) x y)) + +(rule 0 (lower (sadd_overflow x y @ (value_type $I128))) + (construct_overflow_op_alu_128 (CC.O) (AluRmiROpcode.Add) (AluRmiROpcode.Adc) x y)) + +;;;; Rules for `usub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 1 (lower (usub_overflow x y @ (value_type (fits_in_64 ty)))) + (construct_overflow_op_alu ty (CC.B) (AluRmiROpcode.Sub) x y)) + +(rule 0 (lower (usub_overflow x y @ (value_type $I128))) + (construct_overflow_op_alu_128 (CC.B) (AluRmiROpcode.Sub) (AluRmiROpcode.Sbb) x y)) + +;;;; Rules for `ssub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 1 (lower (ssub_overflow x y @ (value_type (fits_in_64 ty)))) + (construct_overflow_op_alu ty (CC.O) (AluRmiROpcode.Sub) x y)) + +(rule 0 (lower (ssub_overflow x y @ (value_type $I128))) + (construct_overflow_op_alu_128 (CC.O) (AluRmiROpcode.Sub) (AluRmiROpcode.Sbb) x y)) + +;;;; Rules for `umul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 2 (lower (umul_overflow x y @ (value_type (fits_in_64 ty)))) + (construct_overflow_op (CC.O) (x64_umullo_with_flags_paired ty x y))) + +;;;; Rules for `smul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule 2 (lower (smul_overflow x y @ (value_type (ty_int_ref_16_to_64 ty)))) + (construct_overflow_op_alu ty (CC.O) (AluRmiROpcode.Mul) x y)) + +;; there is no 8bit imul with an immediate operand so we need to put it in a register or memory +(rule 1 (lower (smul_overflow x y @ (value_type $I8))) + (construct_overflow_op (CC.O) (x64_alurmi_with_flags_paired (AluRmiROpcode.Mul) $I8 x (reg_mem_to_reg_mem_imm (put_in_reg_mem y))))) + ;;;; Rules for `sadd_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type (multi_lane 8 16) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 3de34e436664..d64973ce5b42 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -355,6 +355,14 @@ macro_rules! isle_common_prelude_methods { } } + #[inline] + fn ty_int_ref_16_to_64(&mut self, ty: Type) -> Option { + match ty { + I16 | I32 | I64 | R64 => Some(ty), + _ => None, + } + } + #[inline] fn ty_int(&mut self, ty: Type) -> Option { ty.is_int().then(|| ty) diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 5a3018318f64..ad703ed8fe87 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -361,6 +361,10 @@ (decl ty_int_ref_64 (Type) Type) (extern extractor ty_int_ref_64 ty_int_ref_64) +;; An extractor that matches int or reference types bigger than 16 bits but at most 64 bits. +(decl ty_int_ref_16_to_64 (Type) Type) +(extern extractor ty_int_ref_16_to_64 ty_int_ref_16_to_64) + ;; An extractor that only matches integers. (decl ty_int (Type) Type) (extern extractor ty_int ty_int) diff --git a/cranelift/codegen/src/prelude_lower.isle b/cranelift/codegen/src/prelude_lower.isle index eab34154e065..c5ee9f508c7f 100644 --- a/cranelift/codegen/src/prelude_lower.isle +++ b/cranelift/codegen/src/prelude_lower.isle @@ -17,6 +17,16 @@ ;; (Mutable) builder to incrementally construct an `InstOutput`. (type InstOutputBuilder extern (enum)) +;; Type to hold multiple Regs +(type MultiReg + (enum + (Empty) + (One (a Reg)) + (Two (a Reg) (b Reg)) + (Three (a Reg) (b Reg) (c Reg)) + (Four (a Reg) (b Reg) (c Reg) (d Reg)) + )) + ;;;; Registers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (type Reg (primitive Reg)) @@ -144,6 +154,22 @@ (decl preg_to_reg (PReg) Reg) (extern constructor preg_to_reg preg_to_reg) +;; Convert a MultiReg with three registers into an InstOutput containing +;; one ValueRegs containing the first two regs and one containing the third reg +(decl multi_reg_to_pair_and_single (MultiReg) InstOutput) +(rule (multi_reg_to_pair_and_single (MultiReg.Three a b c)) + (output_pair (value_regs a b) c)) + +;; Convert a MultiReg with two registers into an InstOutput containing one ValueRegs with both regs +(decl multi_reg_to_pair (MultiReg) InstOutput) +(rule (multi_reg_to_pair (MultiReg.Two a b)) + (value_regs a b)) + +;; Convert a MultiReg with one register into an InstOutput containing one ValueRegs with the register +(decl multi_reg_to_single (MultiReg) InstOutput) +(rule (multi_reg_to_single (MultiReg.One a)) + (value_reg a)) + ;;;; Common Mach Types ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (type MachLabel (primitive MachLabel)) @@ -335,10 +361,15 @@ (ProducesFlagsReturnsResultWithConsumer (inst MInst) (result Reg)))) ;; Chain another producer to a `ProducesFlags`. -(decl produces_flags_append (ProducesFlags MInst) ProducesFlags) -(rule (produces_flags_append (ProducesFlags.ProducesFlagsSideEffect inst1) inst2) +(decl produces_flags_concat (ProducesFlags ProducesFlags) ProducesFlags) +(rule (produces_flags_concat (ProducesFlags.ProducesFlagsSideEffect inst1) (ProducesFlags.ProducesFlagsSideEffect inst2)) (ProducesFlags.ProducesFlagsTwiceSideEffect inst1 inst2)) +;; Newtype wrapper around `MInst` for instructions that consume and produce flags +(type ConsumesAndProducesFlags (enum + (SideEffect (inst MInst)) + (ReturnsReg (inst MInst) (result Reg)))) + ;; Newtype wrapper around `MInst` for instructions that consume flags. ;; ;; Variant determines how result is given when combined with a @@ -528,6 +559,250 @@ (ConsumesFlags.ConsumesFlagsSideEffect c)) (SideEffectNoResult.Inst3 p1 p2 c)) +;; Combine flag-producing and -consuming instruction that allows more than two results to be returned +(decl with_flags_chained (ProducesFlags ConsumesAndProducesFlags ConsumesFlags) MultiReg) + +;; ProducesFlags.SideEffect + ConsumesAndProducesFlags.SideEffect with all possible ConsumeFlags options +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.Empty))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.Empty))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst reg)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.One reg))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.Two (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2)) + (_ Unit (emit consume_inst3)) + (_ Unit (emit consume_inst4))) + (MultiReg.Two (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + + +;; ProducesFlags.ReturnsReg + ConsumesAndProducesFlags.SideEffect with all possible ConsumeFlags options +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.One prod_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.One prod_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.Two prod_result consume_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.Three prod_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.SideEffect middle_inst) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2)) + (_ Unit (emit consume_inst3)) + (_ Unit (emit consume_inst4))) + (MultiReg.Three prod_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + + +;; ProducesFlags.SideEffect + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.One middle_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.One middle_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.Two middle_result consume_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.Three middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsSideEffect prod_inst) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2)) + (_ Unit (emit consume_inst3)) + (_ Unit (emit consume_inst4))) + (MultiReg.Three middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + + +;; ProducesFlags.ReturnsReg + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.Two prod_result middle_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.Two prod_result middle_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.Three prod_result middle_result consume_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsReg prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2)) + (_ Unit (emit consume_inst3)) + (_ Unit (emit consume_inst4))) + (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + +;; ProducesFlags.ReturnsResultWithConsumer + ConsumesAndProducesFlags.ReturnsReg with all possible ConsumeFlags options +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsSideEffect consume_inst)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.Two prod_result middle_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsSideEffect2 consume_inst1 consume_inst2)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.Two prod_result middle_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsReturnsReg consume_inst consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.Three prod_result middle_result consume_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsReturnsResultWithProducer consume_inst consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst))) + (MultiReg.Three prod_result middle_result consume_result))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsTwiceReturnsValueRegs consume_inst1 consume_inst2 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2))) + (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + +(rule (with_flags_chained (ProducesFlags.ProducesFlagsReturnsResultWithConsumer prod_inst prod_result) + (ConsumesAndProducesFlags.ReturnsReg middle_inst middle_result) + (ConsumesFlags.ConsumesFlagsFourTimesReturnsValueRegs consume_inst1 consume_inst2 consume_inst3 consume_inst4 consume_result)) + (let ((_ Unit (emit prod_inst)) + (_ Unit (emit middle_inst)) + (_ Unit (emit consume_inst1)) + (_ Unit (emit consume_inst2)) + (_ Unit (emit consume_inst3)) + (_ Unit (emit consume_inst4))) + (MultiReg.Four prod_result middle_result (value_regs_get consume_result 0) (value_regs_get consume_result 1)))) + ;;;; Helpers for accessing compilation flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; This definition should be kept up to date with the values defined in diff --git a/cranelift/filetests/filetests/runtests/sadd_overflow.clif b/cranelift/filetests/filetests/runtests/sadd_overflow.clif new file mode 100644 index 000000000000..256932ca67ac --- /dev/null +++ b/cranelift/filetests/filetests/runtests/sadd_overflow.clif @@ -0,0 +1,85 @@ +test interpret +test run +set enable_llvm_abi_extensions=true +target x86_64 +target aarch64 + +function %saddof_i128(i128, i128) -> i128, i8 { +block0(v0: i128,v1: i128): + v2, v3 = sadd_overflow v0, v1 + return v2, v3 +} +; run: %saddof_i128(0, 0) == [0, 0] +; run: %saddof_i128(1, 0) == [1, 0] +; run: %saddof_i128(1, 1) == [2, 0] +; run: %saddof_i128(1, -1) == [0, 0] +; run: %saddof_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == [-1, 0] +; run: %saddof_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, 1) == [0x1_00000000_00000000, 0] +; run: %saddof_i128(-1, 1) == [0, 0] +; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == [-2, 1] +; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [-1, 0] +; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [0, 0] +; run: %saddof_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1] +; run: %saddof_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [0x80000000_00000000_00000000_00000000, 0] +; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == [0x80000000_00000000_00000000_00000000, 1] +; run: %saddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFE, 1) == [0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0] +; run: %saddof_i128(0x01234567_89ABCDEF_01234567_89ABCDEF, 0xFEDCBA98_76543210_FEDCBA98_76543210) == [-1, 0] +; run: %saddof_i128(0x06060606_06060606_A00A00A0_0A00A00A, 0x30303030_30303030_0BB0BB0B_B0BB0BB0) == [0x36363636_36363636_ABBABBAB_BABBABBA, 0] +; run: %saddof_i128(0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111_1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF, 0] + +function %saddof_i64(i64, i64) -> i64, i8 { +block0(v0: i64,v1: i64): + v2, v3 = sadd_overflow v0, v1 + return v2, v3 +} +; run: %saddof_i64(0, 0) == [0, 0] +; run: %saddof_i64(0, 1) == [1, 0] +; run: %saddof_i64(-1, 0) == [-1, 0] +; run: %saddof_i64(-1, 1) == [0, 0] +; run: %saddof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [-2, 1] +; run: %saddof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000000) == [-1, 0] +; run: %saddof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000001) == [0, 0] +; run: %saddof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [-1, 0] +; run: %saddof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543220) == [0xF, 0] +; run: %saddof_i64(0xA00A00A0_0A00A00A, 0x0BB0BB0B_B0BB0BB0) == [0xABBABBAB_BABBABBA, 0] +; run: %saddof_i64(0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0] + +function %saddof_i8(i8, i8) -> i8, i8 { +block0(v0: i8, v1: i8): + v2, v3 = sadd_overflow v0, v1 + return v2, v3 +} +; run: %saddof_i8(0, 1) == [1, 0] +; run: %saddof_i8(100, 27) == [127, 0] +; run: %saddof_i8(100, -20) == [80, 0] +; run: %saddof_i8(100, 28) == [-128, 1] +; run: %saddof_i8(-128, -128) == [0, 1] +; run: %saddof_i8(-128, -1) == [0x7F, 1] +; run: %saddof_i8(-127, -1) == [-128, 0] +; run: %saddof_i8(127, 1) == [0x80, 1] + +function %saddof_i16(i16, i16) -> i16, i8 { +block0(v0: i16, v1: i16): + v2, v3 = sadd_overflow v0, v1 + return v2, v3 +} +; run: %saddof_i16(0, 1) == [1, 0] +; run: %saddof_i16(100, 27) == [127, 0] +; run: %saddof_i16(100, 28) == [128, 0] +; run: %saddof_i16(32000, 767) == [32767, 0] +; run: %saddof_i16(32000, 768) == [-32768, 1] +; run: %saddof_i16(-32767, -1) == [-32768, 0] +; run: %saddof_i16(-32768, -1) == [32767, 1] + +function %saddof_i32(i32, i32) -> i32, i8 { +block0(v0: i32, v1: i32): + v2, v3 = sadd_overflow v0, v1 + return v2, v3 +} +; run: %saddof_i32(0, 1) == [1, 0] +; run: %saddof_i32(100, 27) == [127, 0] +; run: %saddof_i32(100, 28) == [128, 0] +; run: %saddof_i32(0x7FFF_FFFE, 1) == [0x7FFF_FFFF, 0] +; run: %saddof_i32(0x7FFF_FFFF, 1) == [0x8000_0000, 1] +; run: %saddof_i32(0x8000_0000, 0xFFFF_FFFF) == [0x7FFF_FFFF, 1] +; run: %saddof_i32(0x8000_0001, 0xFFFF_FFFF) == [0x8000_0000, 0] \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/smul_overflow.clif b/cranelift/filetests/filetests/runtests/smul_overflow.clif new file mode 100644 index 000000000000..d61cae79b409 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/smul_overflow.clif @@ -0,0 +1,76 @@ +test interpret +test run +target x86_64 +target aarch64 + +function %smulof_i64(i64, i64) -> i64, i8 { +block0(v0: i64, v1: i64): + v2, v3 = smul_overflow v0, v1 + return v2, v3 +} + +; run: %smulof_i64(0, 1) == [0, 0] +; run: %smulof_i64(1, 1) == [1, 0] +; run: %smulof_i64(0xFFFFFFFF_FFFFFFFF, 2) == [0xFFFFFFFF_FFFFFFFE, 0] +; run: %smulof_i64(0x7FFFFFFF_FFFFFFFF, 2) == [0xFFFFFFFF_FFFFFFFE, 1] +; run: %smulof_i64(1, -1) == [-1, 0] +; run: %smulof_i64(2, 2) == [4, 0] +; run: %smulof_i64(2, -2) == [-4, 0] +; run: %smulof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [1, 1] +; run: %smulof_i64(0x80000000_00000000, 0x7FFFFFFF_FFFFFFFF) == [0x80000000_00000000, 1] +; run: %smulof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x2236D88F_E5618CF0, 1] +; run: %smulof_i64(0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 1] + +function %smulof_i32(i32, i32) -> i32, i8 { +block0(v0: i32, v1: i32): + v2, v3 = smul_overflow v0, v1 + return v2, v3 +} + +; run: %smulof_i32(0, 1) == [0, 0] +; run: %smulof_i32(1, 1) == [1, 0] +; run: %smulof_i32(0xFFFFFFFF, 2) == [0xFFFFFFFE, 0] +; run: %smulof_i32(0x7FFFFFFF, 2) == [0xFFFFFFFE, 1] +; run: %smulof_i32(1, -1) == [-1, 0] +; run: %smulof_i32(2, 2) == [4, 0] +; run: %smulof_i32(2, -2) == [-4, 0] +; run: %smulof_i32(0x7FFFFFFF, 0x7FFFFFFF) == [1, 1] +; run: %smulof_i32(0x80000000, 0x7FFFFFFF) == [0x80000000, 1] +; run: %smulof_i32(0x01234567, 0xFEDCBA98) == [0x23E20B28, 1] +; run: %smulof_i32(0xC0FFEEEE, 0xDECAFFFF) == [0x19BA1112, 1] + +function %smulof_i16(i16, i16) -> i16, i8 { +block0(v0: i16, v1: i16): + v2, v3 = smul_overflow v0, v1 + return v2, v3 +} + +; run: %smulof_i16(0, 1) == [0, 0] +; run: %smulof_i16(1, 1) == [1, 0] +; run: %smulof_i16(0xFFFF, 2) == [0xFFFE, 0] +; run: %smulof_i16(0x7FFF, 2) == [0xFFFE, 1] +; run: %smulof_i16(1, -1) == [-1, 0] +; run: %smulof_i16(2, 2) == [4, 0] +; run: %smulof_i16(2, -2) == [-4, 0] +; run: %smulof_i16(0x7FFF, 0x7FFF) == [1, 1] +; run: %smulof_i16(0x8000, 0x7FFF) == [0x8000, 1] +; run: %smulof_i16(0x0123, 0xFEDC) == [0xB414, 1] +; run: %smulof_i16(0xC0FF, 0xDECA) == [0x6B36, 1] + +function %smulof_i8(i8, i8) -> i8, i8 { +block0(v0: i8, v1: i8): + v2, v3 = smul_overflow v0, v1 + return v2, v3 +} + +; run: %smulof_i8(0, 1) == [0, 0] +; run: %smulof_i8(1, 1) == [1, 0] +; run: %smulof_i8(0xFF, 2) == [0xFE, 0] +; run: %smulof_i8(0x7F, 2) == [0xFE, 1] +; run: %smulof_i8(1, -1) == [-1, 0] +; run: %smulof_i8(2, 2) == [4, 0] +; run: %smulof_i8(2, -2) == [-4, 0] +; run: %smulof_i8(0x7F, 0x7F) == [1, 1] +; run: %smulof_i8(0x80, 0x7F) == [0x80, 1] +; run: %smulof_i8(0x01, 0xFE) == [0xFE, 0] +; run: %smulof_i8(0xC0, 0xDE) == [0x80, 1] \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/ssub_overflow.clif b/cranelift/filetests/filetests/runtests/ssub_overflow.clif new file mode 100644 index 000000000000..4ca2a9a04c02 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/ssub_overflow.clif @@ -0,0 +1,91 @@ +test interpret +test run +set enable_llvm_abi_extensions=true +target x86_64 +target aarch64 + +function %ssubof_i128(i128, i128) -> i128, i8 { +block0(v0: i128,v1: i128): + v2, v3 = ssub_overflow v0, v1 + return v2, v3 +} +; run: %ssubof_i128(0, 0) == [0, 0] +; run: %ssubof_i128(0, 1) == [-1, 0] +; run: %ssubof_i128(-1, 0) == [-1, 0] +; run: %ssubof_i128(-1, 1) == [-2, 0] +; run: %ssubof_i128(-1, -2) == [1, 0] +; run: %ssubof_i128(0x00000000_00000001_00000000_00000000, 1) == [0xFFFFFFFF_FFFFFFFF, 0] +; run: %ssubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == [0, 0] +; run: %ssubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [-1, 1] +; run: %ssubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [-2, 1] +; run: %ssubof_i128(0, 0x80000000_00000000_00000000_00000000) == [0x80000000_00000000_00000000_00000000, 1] +; run: %ssubof_i128(0x80000000_00000000_00000000_00000000, 1) == [0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1] +; run: %ssubof_i128(0x80000000_00000000_00000000_00000001, 1) == [0x80000000_00000000_00000000_00000000, 0] + +function %ssubof_i64(i64, i64) -> i64, i8 { +block0(v0: i64,v1: i64): + v2, v3 = ssub_overflow v0, v1 + return v2, v3 +} +; run: %ssubof_i64(0, 0) == [0, 0] +; run: %ssubof_i64(0, 1) == [-1, 0] +; run: %ssubof_i64(-1, 0) == [-1, 0] +; run: %ssubof_i64(-1, 1) == [-2, 0] +; run: %ssubof_i64(-1, -2) == [1, 0] +; run: %ssubof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [0, 0] +; run: %ssubof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000000) == [-1, 1] +; run: %ssubof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000001) == [-2, 1] +; run: %ssubof_i64(0, 0x80000000_00000000) == [0x80000000_00000000, 1] +; run: %ssubof_i64(0x80000000_00000000, 1) == [0x7FFFFFFF_FFFFFFFF, 1] +; run: %ssubof_i64(0x80000000_00000001, 1) == [0x80000000_00000000, 0] +; run: %ssubof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x0246_8ACF_1357_9BDF, 0] +; run: %ssubof_i64(0xFEDCBA98_76543220, 0x01234567_89ABCDEF) == [0xFDB9_7530_ECA8_6431, 0] + +function %ssubof_i8(i8, i8) -> i8, i8 { +block0(v0: i8, v1: i8): + v2, v3 = ssub_overflow v0, v1 + return v2, v3 +} +; run: %ssubof_i8(0, 1) == [-1, 0] +; run: %ssubof_i8(100, 20) == [80, 0] +; run: %ssubof_i8(100, -20) == [120, 0] +; run: %ssubof_i8(0x80, 0x80) == [0, 0] +; run: %ssubof_i8(0x7F, 0x80) == [0xFF, 1] +; run: %ssubof_i8(0, 0x80) == [0x80, 1] +; run: %ssubof_i8(0x80, 0x80) == [0, 0] +; run: %ssubof_i8(0x80, 0x01) == [0x7F, 1] +; run: %ssubof_i8(0x7F, 0xFF) == [0x80, 1] +; run: %ssubof_i8(0x7E, 0xFF) == [0x7F, 0] +; run: %ssubof_i8(0x80, 1) == [0x7F, 1] + +function %ssubof_i16(i16, i16) -> i16, i8 { +block0(v0: i16, v1: i16): + v2, v3 = ssub_overflow v0, v1 + return v2, v3 +} +; run: %ssubof_i16(0, 1) == [-1, 0] +; run: %ssubof_i16(100, 20) == [80, 0] +; run: %ssubof_i16(0xFFFF, 0xFFFF) == [0, 0] +; run: %ssubof_i16(0xFFFE, 0xFFFF) == [-1, 0] +; run: %ssubof_i16(0xFFFE, 0xFE) == [0xFF00, 0] +; run: %ssubof_i16(0, 0x8000) == [0x8000, 1] +; run: %ssubof_i16(0x8000, 0x0001) == [0x7FFF, 1] +; run: %ssubof_i16(0x8000, 0xFFFF) == [0x8001, 0] +; run: %ssubof_i16(0x7FFF, 0xFFFF) == [0x8000, 1] +; run: %ssubof_i16(0x7FFE, 0xFFFF) == [0x7FFF, 0] + +function %ssubof_i32(i32, i32) -> i32, i8 { +block0(v0: i32, v1: i32): + v2, v3 = ssub_overflow v0, v1 + return v2, v3 +} +; run: %ssubof_i32(0, 1) == [-1, 0] +; run: %ssubof_i32(100, 20) == [80, 0] +; run: %ssubof_i32(0xFFFF_FFFF, 0xFFFF_FFFF) == [0, 0] +; run: %ssubof_i32(0, 0x8000_0000) == [0x8000_0000, 1] +; run: %ssubof_i32(0x8000_0000, 0x0000_0001) == [0x7FFF_FFFF, 1] +; run: %ssubof_i32(0x8000_0000, 0xFFFF_FFFF) == [0x8000_0001, 0] +; run: %ssubof_i32(0xFFFF_FFFE, 0xFFFF_FFFF) == [-1, 0] +; run: %ssubof_i32(0xFFFF_FFFE, 0xFE) == [0xFFFF_FF00, 0] +; run: %ssubof_i32(0x7FFF_FFFF, 0xFFFF_FFFF) == [0x8000_0000, 1] +; run: %ssubof_i32(0x7FFF_FFFE, 0xFFFF_FFFF) == [0x7FFF_FFFF, 0] \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/uadd_overflow.clif b/cranelift/filetests/filetests/runtests/uadd_overflow.clif new file mode 100644 index 000000000000..a847f8fb1f91 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/uadd_overflow.clif @@ -0,0 +1,77 @@ +test interpret +test run +set enable_llvm_abi_extensions=true +target x86_64 +target aarch64 + +function %uaddof_i128(i128, i128) -> i128, i8 { +block0(v0: i128,v1: i128): + v2, v3 = uadd_overflow v0, v1 + return v2, v3 +} +; run: %uaddof_i128(0, 0) == [0, 0] +; run: %uaddof_i128(1, 0) == [1, 0] +; run: %uaddof_i128(1, 1) == [2, 0] +; run: %uaddof_i128(1, -1) == [0, 1] +; run: %uaddof_i128(0xFFFFFFFF_FFFFFFFF_00000000_00000000, 0x00000000_00000000_FFFFFFFF_FFFFFFFF) == [-1, 0] +; run: %uaddof_i128(0x00000000_00000000_FFFFFFFF_FFFFFFFF, 1) == [0x1_00000000_00000000, 0] +; run: %uaddof_i128(-1, 1) == [0, 1] +; run: %uaddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == [-2, 0] +; run: %uaddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [-1, 0] +; run: %uaddof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [0, 1] +; run: %uaddof_i128(0x01234567_89ABCDEF_01234567_89ABCDEF, 0xFEDCBA98_76543210_FEDCBA98_76543210) == [-1, 0] +; run: %uaddof_i128(0x06060606_06060606_A00A00A0_0A00A00A, 0x30303030_30303030_0BB0BB0B_B0BB0BB0) == [0x36363636_36363636_ABBABBAB_BABBABBA, 0] +; run: %uaddof_i128(0xC0FFEEEE_C0FFEEEE_C0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111_1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF_DECAFFFF_DECAFFFF, 0] + +function %uaddof_i64(i64, i64) -> i64, i8 { +block0(v0: i64,v1: i64): + v2, v3 = uadd_overflow v0, v1 + return v2, v3 +} +; run: %uaddof_i64(0, 0) == [0, 0] +; run: %uaddof_i64(0, 1) == [1, 0] +; run: %uaddof_i64(-1, 0) == [-1, 0] +; run: %uaddof_i64(-1, 1) == [0, 1] +; run: %uaddof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [-2, 0] +; run: %uaddof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000000) == [-1, 0] +; run: %uaddof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000001) == [0, 1] +; run: %uaddof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [-1, 0] +; run: %uaddof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543220) == [0xF, 1] +; run: %uaddof_i64(0xA00A00A0_0A00A00A, 0x0BB0BB0B_B0BB0BB0) == [0xABBABBAB_BABBABBA, 0] +; run: %uaddof_i64(0xC0FFEEEE_C0FFEEEE, 0x1DCB1111_1DCB1111) == [0xDECAFFFF_DECAFFFF, 0] + +function %uaddof_i8(i8, i8) -> i8, i8 { +block0(v0: i8, v1: i8): + v2, v3 = uadd_overflow v0, v1 + return v2, v3 +} +; run: %uaddof_i8(0, 1) == [1, 0] +; run: %uaddof_i8(100, 27) == [127, 0] +; run: %uaddof_i8(100, -20) == [80, 1] +; run: %uaddof_i8(100, 28) == [-128, 0] +; run: %uaddof_i8(-128, -128) == [0, 1] +; run: %uaddof_i8(127, 1) == [0x80, 0] + +function %uaddof_i16(i16, i16) -> i16, i8 { +block0(v0: i16, v1: i16): + v2, v3 = uadd_overflow v0, v1 + return v2, v3 +} +; run: %uaddof_i16(0, 1) == [1, 0] +; run: %uaddof_i16(100, 27) == [127, 0] +; run: %uaddof_i16(100, 28) == [128, 0] +; run: %uaddof_i16(32000, 767) == [32767, 0] +; run: %uaddof_i16(32000, 768) == [-32768, 0] +; run: %uaddof_i16(65000, 535) == [65535, 0] +; run: %uaddof_i16(65000, 536) == [0, 1] + +function %uaddof_i32(i32, i32) -> i32, i8 { +block0(v0: i32, v1: i32): + v2, v3 = uadd_overflow v0, v1 + return v2, v3 +} +; run: %uaddof_i32(0, 1) == [1, 0] +; run: %uaddof_i32(100, 27) == [127, 0] +; run: %uaddof_i32(100, 28) == [128, 0] +; run: %uaddof_i32(3000000000, 1294967295) == [-1, 0] +; run: %uaddof_i32(3000000000, 1294967296) == [0, 1] \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/umul_overflow.clif b/cranelift/filetests/filetests/runtests/umul_overflow.clif new file mode 100644 index 000000000000..a76feb267d07 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/umul_overflow.clif @@ -0,0 +1,68 @@ +test interpret +test run +target x86_64 +target aarch64 + +function %umulof_i64(i64, i64) -> i64, i8 { +block0(v0: i64, v1: i64): + v2, v3 = umul_overflow v0, v1 + return v2, v3 +} + +; run: %umulof_i64(0, 1) == [0, 0] +; run: %umulof_i64(1, 1) == [1, 0] +; run: %umulof_i64(0xFFFFFFFF_FFFFFFFF, 2) == [0xFFFFFFFF_FFFFFFFE, 1] +; run: %umulof_i64(1, -1) == [-1, 0] +; run: %umulof_i64(2, 2) == [4, 0] +; run: %umulof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [1, 1] +; run: %umulof_i64(0x80000000_00000000, 0x7FFFFFFF_FFFFFFFF) == [0x80000000_00000000, 1] +; run: %umulof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x2236D88F_E5618CF0, 1] +; run: %umulof_i64(0xC0FFEEEE_C0FFEEEE, 0xDECAFFFF_DECAFFFF) == [0xDB6B1E48_19BA1112, 1] + +function %umulof_i32(i32, i32) -> i32, i8 { +block0(v0: i32, v1: i32): + v2, v3 = umul_overflow v0, v1 + return v2, v3 +} + +; run: %umulof_i32(0, 1) == [0, 0] +; run: %umulof_i32(1, 1) == [1, 0] +; run: %umulof_i32(0xFFFFFFFF, 2) == [0xFFFFFFFE, 1] +; run: %umulof_i32(1, -1) == [-1, 0] +; run: %umulof_i32(2, 2) == [4, 0] +; run: %umulof_i32(0x7FFFFFFF, 0x7FFFFFFF) == [1, 1] +; run: %umulof_i32(0x80000000, 0x7FFFFFFF) == [0x80000000, 1] +; run: %umulof_i32(0x01234567, 0xFEDCBA98) == [0x23E20B28, 1] +; run: %umulof_i32(0xC0FFEEEE, 0xDECAFFFF) == [0x19BA1112, 1] + +function %umulof_i16(i16, i16) -> i16, i8 { +block0(v0: i16, v1: i16): + v2, v3 = umul_overflow v0, v1 + return v2, v3 +} + +; run: %umulof_i16(0, 1) == [0, 0] +; run: %umulof_i16(1, 1) == [1, 0] +; run: %umulof_i16(0xFFFF, 2) == [0xFFFE, 1] +; run: %umulof_i16(1, -1) == [-1, 0] +; run: %umulof_i16(2, 2) == [4, 0] +; run: %umulof_i16(0x7FFF, 0x7FFF) == [1, 1] +; run: %umulof_i16(0x8000, 0x7FFF) == [0x8000, 1] +; run: %umulof_i16(0x0123, 0xFEDC) == [0xB414, 1] +; run: %umulof_i16(0xC0FF, 0xDECA) == [0x6B36, 1] + +function %umulof_i8(i8, i8) -> i8, i8 { +block0(v0: i8, v1: i8): + v2, v3 = umul_overflow v0, v1 + return v2, v3 +} + +; run: %umulof_i8(0, 1) == [0, 0] +; run: %umulof_i8(1, 1) == [1, 0] +; run: %umulof_i8(0xFF, 2) == [0xFE, 1] +; run: %umulof_i8(1, -1) == [-1, 0] +; run: %umulof_i8(2, 2) == [4, 0] +; run: %umulof_i8(0x7F, 0x7F) == [1, 1] +; run: %umulof_i8(0x80, 0x7F) == [0x80, 1] +; run: %umulof_i8(0x01, 0xFE) == [0xFE, 0] +; run: %umulof_i8(0xC0, 0xDE) == [0x80, 1] \ No newline at end of file diff --git a/cranelift/filetests/filetests/runtests/usub_overflow.clif b/cranelift/filetests/filetests/runtests/usub_overflow.clif new file mode 100644 index 000000000000..25489d240376 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/usub_overflow.clif @@ -0,0 +1,74 @@ +test interpret +test run +set enable_llvm_abi_extensions=true +target x86_64 +target aarch64 + +function %usubof_i128(i128, i128) -> i128, i8 { +block0(v0: i128,v1: i128): + v2, v3 = usub_overflow v0, v1 + return v2, v3 +} +; run: %usubof_i128(0, 0) == [0, 0] +; run: %usubof_i128(0, 1) == [-1, 1] +; run: %usubof_i128(-1, 0) == [-1, 0] +; run: %usubof_i128(-1, 1) == [-2, 0] +; run: %usubof_i128(-1, -2) == [1, 0] +; run: %usubof_i128(0x00000000_00000001_00000000_00000000, 1) == [0xFFFFFFFF_FFFFFFFF, 0] +; run: %usubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == [0, 0] +; run: %usubof_i128(0x80000000_00000000_00000000_00000000, 1) == [0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0] +; run: %usubof_i128(0x80000000_00000000_00000000_00000001, 1) == [0x80000000_00000000_00000000_00000000, 0] +; run: %usubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == [-1, 1] +; run: %usubof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000001) == [-2, 1] + +function %usubof_i64(i64, i64) -> i64, i8 { +block0(v0: i64,v1: i64): + v2, v3 = usub_overflow v0, v1 + return v2, v3 +} +; run: %usubof_i64(0, 0) == [0, 0] +; run: %usubof_i64(0, 1) == [-1, 1] +; run: %usubof_i64(-1, 0) == [-1, 0] +; run: %usubof_i64(-1, 1) == [-2, 0] +; run: %usubof_i64(-1, -2) == [1, 0] +; run: %usubof_i64(0x7FFFFFFF_FFFFFFFF, 0x7FFFFFFF_FFFFFFFF) == [0, 0] +; run: %usubof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000000) == [-1, 1] +; run: %usubof_i64(0x7FFFFFFF_FFFFFFFF, 0x80000000_00000001) == [-2, 1] +; run: %usubof_i64(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == [0x0246_8ACF_1357_9BDF, 1] +; run: %usubof_i64(0xFEDCBA98_76543220, 0x01234567_89ABCDEF) == [0xFDB9_7530_ECA8_6431, 0] + +function %usubof_i8(i8, i8) -> i8, i8 { +block0(v0: i8, v1: i8): + v2, v3 = usub_overflow v0, v1 + return v2, v3 +} +; run: %usubof_i8(0, 1) == [-1, 1] +; run: %usubof_i8(100, 20) == [80, 0] +; run: %usubof_i8(100, -20) == [120, 1] +; run: %usubof_i8(127, -128) == [-1, 1] +; run: %usubof_i8(0x80, 0x80) == [0, 0] +; run: %usubof_i8(0xFF, 0xFF) == [0, 0] +; run: %usubof_i8(0xFE, 0xFF) == [0xFF, 1] +; run: %usubof_i8(0x80, 1) == [0x7F, 0] + +function %usubof_i16(i16, i16) -> i16, i8 { +block0(v0: i16, v1: i16): + v2, v3 = usub_overflow v0, v1 + return v2, v3 +} +; run: %usubof_i16(0, 1) == [-1, 1] +; run: %usubof_i16(100, 20) == [80, 0] +; run: %usubof_i16(0xFFFF, 0xFFFF) == [0, 0] +; run: %usubof_i16(0xFFFE, 0xFFFF) == [-1, 1] +; run: %usubof_i16(0xFFFE, 0xFE) == [0xFF00, 0] + +function %usubof_i32(i32, i32) -> i32, i8 { +block0(v0: i32, v1: i32): + v2, v3 = usub_overflow v0, v1 + return v2, v3 +} +; run: %usubof_i32(0, 1) == [-1, 1] +; run: %usubof_i32(100, 20) == [80, 0] +; run: %usubof_i32(0xFFFF_FFFF, 0xFFFF_FFFF) == [0, 0] +; run: %usubof_i32(0xFFFF_FFFE, 0xFFFF_FFFF) == [-1, 1] +; run: %usubof_i32(0xFFFF_FFFE, 0xFE) == [0xFFFF_FF00, 0] \ No newline at end of file diff --git a/cranelift/fuzzgen/src/function_generator.rs b/cranelift/fuzzgen/src/function_generator.rs index a6e28b73b82f..940db95b90cf 100644 --- a/cranelift/fuzzgen/src/function_generator.rs +++ b/cranelift/fuzzgen/src/function_generator.rs @@ -466,6 +466,7 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - args, rets, (Opcode::IaddCout, &([I8, I8] | [I16, I16] | [I128, I128])), + (Opcode::UmulOverflow | Opcode::SmulOverflow, &[I128, I128]), (Opcode::Imul, &[I8X16, I8X16]), // https://github.com/bytecodealliance/wasmtime/issues/5468 (Opcode::Smulhi | Opcode::Umulhi, &[I8, I8]), @@ -583,6 +584,7 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - args, rets, (Opcode::IaddCout, &[I128, I128]), + (Opcode::UmulOverflow | Opcode::SmulOverflow, &[I128, I128]), // https://github.com/bytecodealliance/wasmtime/issues/4864 (Opcode::Udiv | Opcode::Sdiv, &[I128, I128]), // https://github.com/bytecodealliance/wasmtime/issues/5472 @@ -638,6 +640,9 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - args, rets, (Opcode::IaddCout), + (Opcode::UaddOverflow | Opcode::SaddOverflow), + (Opcode::UsubOverflow | Opcode::SsubOverflow), + (Opcode::UmulOverflow | Opcode::SmulOverflow), ( Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem, &[I128, I128] @@ -682,6 +687,9 @@ fn valid_for_target(triple: &Triple, op: Opcode, args: &[Type], rets: &[Type]) - rets, // TODO (Opcode::IaddCout), + (Opcode::UaddOverflow | Opcode::SaddOverflow), + (Opcode::UsubOverflow | Opcode::SsubOverflow), + (Opcode::UmulOverflow | Opcode::SmulOverflow), // TODO ( Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem, diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index aa5d3d99fe8e..129a5cd2285b 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -752,6 +752,48 @@ where Opcode::UremImm => binary_unsigned_can_trap(DataValueExt::rem, arg(0), imm_as_ctrl_ty()?)?, Opcode::SremImm => binary_can_trap(DataValueExt::rem, arg(0), imm_as_ctrl_ty()?)?, Opcode::IrsubImm => binary(DataValueExt::sub, imm_as_ctrl_ty()?, arg(0))?, + Opcode::UaddOverflow => { + let lhs = arg(0).convert(ValueConversionKind::ToUnsigned)?; + let rhs = arg(1).convert(ValueConversionKind::ToUnsigned)?; + let (mut sum, carry) = lhs.overflowing_add(rhs)?; + sum = sum.convert(ValueConversionKind::ToSigned)?; + assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?]) + } + Opcode::SaddOverflow => { + let ty = arg(0).ty(); + let lhs = arg(0).convert(ValueConversionKind::ToSigned)?; + let rhs = arg(1).convert(ValueConversionKind::ToSigned)?; + let (sum, carry) = lhs.overflowing_add(rhs)?; + assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?]) + } + Opcode::UsubOverflow => { + let lhs = arg(0).convert(ValueConversionKind::ToUnsigned)?; + let rhs = arg(1).convert(ValueConversionKind::ToUnsigned)?; + let (mut sum, carry) = lhs.overflowing_sub(rhs)?; + sum = sum.convert(ValueConversionKind::ToSigned)?; + assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?]) + } + Opcode::SsubOverflow => { + let ty = arg(0).ty(); + let lhs = arg(0).convert(ValueConversionKind::ToSigned)?; + let rhs = arg(1).convert(ValueConversionKind::ToSigned)?; + let (sum, carry) = lhs.overflowing_sub(rhs)?; + assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?]) + } + Opcode::UmulOverflow => { + let lhs = arg(0).convert(ValueConversionKind::ToUnsigned)?; + let rhs = arg(1).convert(ValueConversionKind::ToUnsigned)?; + let (mut sum, carry) = lhs.overflowing_mul(rhs)?; + sum = sum.convert(ValueConversionKind::ToSigned)?; + assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?]) + } + Opcode::SmulOverflow => { + let ty = arg(0).ty(); + let lhs = arg(0).convert(ValueConversionKind::ToSigned)?; + let rhs = arg(1).convert(ValueConversionKind::ToSigned)?; + let (sum, carry) = lhs.overflowing_mul(rhs)?; + assign_multiple(&[sum, DataValueExt::bool(carry, false, types::I8)?]) + } Opcode::IaddCin => choose( DataValueExt::into_bool(arg(2))?, DataValueExt::add( diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 79e3e129c578..d03d4a16ac2b 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -44,6 +44,9 @@ pub trait DataValueExt: Sized { fn fma(self, a: Self, b: Self) -> ValueResult; fn abs(self) -> ValueResult; fn checked_add(self, other: Self) -> ValueResult>; + fn overflowing_add(self, other: Self) -> ValueResult<(Self, bool)>; + fn overflowing_sub(self, other: Self) -> ValueResult<(Self, bool)>; + fn overflowing_mul(self, other: Self) -> ValueResult<(Self, bool)>; // Float operations fn neg(self) -> ValueResult; @@ -181,6 +184,15 @@ macro_rules! binary_match { _ => unimplemented!() } }; + ( pair $op:ident($arg1:expr, $arg2:expr); [ $( $data_value_ty:ident ),* ] ) => { + match ($arg1, $arg2) { + $( (DataValue::$data_value_ty(a), DataValue::$data_value_ty(b)) => { + let (f, s) = a.$op(*b); + Ok((DataValue::$data_value_ty(f), s)) + } )* + _ => unimplemented!() + } + }; ( $op:tt($arg1:expr, $arg2:expr); [ $( $data_value_ty:ident ),* ] ) => { match ($arg1, $arg2) { $( (DataValue::$data_value_ty(a), DataValue::$data_value_ty(b)) => { Ok(DataValue::$data_value_ty(a $op b)) } )* @@ -439,6 +451,11 @@ impl DataValueExt for DataValue { DataValue::I32(n) => DataValue::U32(n as u32), DataValue::I64(n) => DataValue::U64(n as u64), DataValue::I128(n) => DataValue::U128(n as u128), + DataValue::U8(_) => self, + DataValue::U16(_) => self, + DataValue::U32(_) => self, + DataValue::U64(_) => self, + DataValue::U128(_) => self, _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind), }, ValueConversionKind::ToSigned => match self { @@ -447,6 +464,11 @@ impl DataValueExt for DataValue { DataValue::U32(n) => DataValue::I32(n as i32), DataValue::U64(n) => DataValue::I64(n as i64), DataValue::U128(n) => DataValue::I128(n as i128), + DataValue::I8(_) => self, + DataValue::I16(_) => self, + DataValue::I32(_) => self, + DataValue::I64(_) => self, + DataValue::I128(_) => self, _ => unimplemented!("conversion: {} -> {:?}", self.ty(), kind), }, ValueConversionKind::RoundNearestEven(ty) => match (self, ty) { @@ -615,6 +637,18 @@ impl DataValueExt for DataValue { binary_match!(option checked_add(&self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]) } + fn overflowing_add(self, other: Self) -> ValueResult<(Self, bool)> { + binary_match!(pair overflowing_add(&self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]) + } + + fn overflowing_sub(self, other: Self) -> ValueResult<(Self, bool)> { + binary_match!(pair overflowing_sub(&self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]) + } + + fn overflowing_mul(self, other: Self) -> ValueResult<(Self, bool)> { + binary_match!(pair overflowing_mul(&self, &other); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]) + } + fn neg(self) -> ValueResult { unary_match!(neg(&self); [F32, F64]) }