diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index b7d22deb7e2b..aff7c7c8e3d1 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -3140,7 +3140,6 @@ (ConsumesFlags.ConsumesFlagsSideEffect (MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets))))) - ;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC)))) @@ -3157,6 +3156,19 @@ (rule (lower_icmp_bool (IcmpCondResult.Condition producer cc)) (with_flags producer (x64_setcc cc))) +;; Emit a conditional move based on the result of an icmp. +(decl select_icmp (IcmpCondResult Value Value) ValueRegs) + +;; Ensure that we put the `x` argument into a register for single-register +;; gpr-typed arguments, as we rely on this for the legalization of heap_addr and +;; loading easily computed constants (like 0) from memory is too expensive. +(rule (select_icmp (IcmpCondResult.Condition producer cc) x @ (value_type (is_gpr_type (is_single_register_type ty))) y) + (with_flags producer (cmove ty cc (put_in_gpr x) y))) + +;; Otherwise, fall back on the behavior of `cmove_from_values`. +(rule (select_icmp (IcmpCondResult.Condition producer cc) x @ (value_type ty) y) + (with_flags producer (cmove_from_values ty cc x y))) + (decl emit_cmp (IntCC Value Value) IcmpCondResult) ;; For GPR-held values we only need to emit `CMP + SETCC`. We rely here on diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 33a6cc916365..4b2a02a02c01 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -645,21 +645,6 @@ impl Inst { } } - pub(crate) fn xmm_cmove(ty: Type, cc: CC, src: RegMem, dst: Writable) -> Inst { - debug_assert!(ty == types::F32 || ty == types::F64 || ty.is_vector()); - src.assert_regclass_is(RegClass::Float); - debug_assert!(dst.to_reg().class() == RegClass::Float); - let src = XmmMem::new(src).unwrap(); - let dst = WritableXmm::from_writable_reg(dst).unwrap(); - Inst::XmmCmove { - ty, - cc, - consequent: src, - alternative: dst.to_reg(), - dst, - } - } - pub(crate) fn push64(src: RegMemImm) -> Inst { src.assert_regclass_is(RegClass::Int); let src = GprMemImm::new(src).unwrap(); diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 422f62b5bf52..a11fa45dd379 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2945,3 +2945,11 @@ (rule (lower_branch (br_table idx @ (value_type ty) _ _) (jump_table_targets default_target jt_targets)) (side_effect (jmp_table_seq ty idx default_target jt_targets))) + +;; Rules for `selectif` and `selectif_spectre_guard` ;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (selectif cc (ifcmp a b) x y)) + (select_icmp (emit_cmp cc a b) x y)) + +(rule (lower (selectif_spectre_guard cc (ifcmp a b) x y)) + (select_icmp (emit_cmp cc a b) x y)) diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 225a62c08156..cbad5dd376fb 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -5,8 +5,7 @@ pub(super) mod isle; use crate::data_value::DataValue; use crate::ir::{ - condcodes::{FloatCC, IntCC}, - types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type, + condcodes::FloatCC, types, ExternalName, Inst as IRInst, InstructionData, LibCall, Opcode, Type, }; use crate::isa::x64::abi::*; use crate::isa::x64::inst::args::*; @@ -217,40 +216,11 @@ fn extend_input_to_reg>( dst.to_reg() } -/// Returns whether the given input is an immediate that can be properly sign-extended, without any -/// possible side-effect. -fn non_reg_input_to_sext_imm(input: NonRegInput, input_ty: Type) -> Option { - input.constant.and_then(|x| { - // For i64 instructions (prefixed with REX.W), require that the immediate will sign-extend - // to 64 bits. For other sizes, it doesn't matter and we can just use the plain - // constant. - if input_ty.bytes() != 8 || low32_will_sign_extend_to_64(x) { - Some(x as u32) - } else { - None - } - }) -} - fn input_to_imm>(ctx: &mut C, spec: InsnInput) -> Option { ctx.get_input_as_source_or_const(spec.insn, spec.input) .constant } -/// Put the given input into an immediate, a register or a memory operand. -/// Effectful: may mark the given input as used, when returning the register form. -fn input_to_reg_mem_imm>(ctx: &mut C, spec: InsnInput) -> RegMemImm { - let input = ctx.get_input_as_source_or_const(spec.insn, spec.input); - let input_ty = ctx.input_ty(spec.insn, spec.input); - match non_reg_input_to_sext_imm(input, input_ty) { - Some(x) => RegMemImm::imm(x), - None => match input_to_reg_mem(ctx, spec) { - RegMem::Reg { reg } => RegMemImm::reg(reg), - RegMem::Mem { addr } => RegMemImm::mem(addr), - }, - } -} - /// Emit an instruction to insert a value `src` into a lane of `dst`. fn emit_insert_lane>( ctx: &mut C, @@ -357,119 +327,6 @@ fn emit_extract_lane>( } } -/// Emits an int comparison instruction. -/// -/// Note: make sure that there are no instructions modifying the flags between a call to this -/// function and the use of the flags! -/// -/// Takes the condition code that will be tested, and returns -/// the condition code that should be used. This allows us to -/// synthesize comparisons out of multiple instructions for -/// special cases (e.g., 128-bit integers). -fn emit_cmp>(ctx: &mut C, insn: IRInst, cc: IntCC) -> IntCC { - let ty = ctx.input_ty(insn, 0); - - let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; - - if ty == types::I128 { - // We need to compare both halves and combine the results appropriately. - let cmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let cmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - let lhs = put_input_in_regs(ctx, inputs[0]); - let lhs_lo = lhs.regs()[0]; - let lhs_hi = lhs.regs()[1]; - let rhs = put_input_in_regs(ctx, inputs[1]); - let rhs_lo = RegMemImm::reg(rhs.regs()[0]); - let rhs_hi = RegMemImm::reg(rhs.regs()[1]); - match cc { - IntCC::Equal => { - ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_hi, lhs_hi)); - ctx.emit(Inst::setcc(CC::Z, cmp1)); - ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo)); - ctx.emit(Inst::setcc(CC::Z, cmp2)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(cmp1.to_reg()), - cmp2, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::imm(1), - cmp2, - )); - IntCC::NotEqual - } - IntCC::NotEqual => { - ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_hi, lhs_hi)); - ctx.emit(Inst::setcc(CC::NZ, cmp1)); - ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo)); - ctx.emit(Inst::setcc(CC::NZ, cmp2)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(cmp1.to_reg()), - cmp2, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::imm(1), - cmp2, - )); - IntCC::NotEqual - } - IntCC::SignedLessThan - | IntCC::SignedLessThanOrEqual - | IntCC::SignedGreaterThan - | IntCC::SignedGreaterThanOrEqual - | IntCC::UnsignedLessThan - | IntCC::UnsignedLessThanOrEqual - | IntCC::UnsignedGreaterThan - | IntCC::UnsignedGreaterThanOrEqual => { - // Result = (lhs_hi <> rhs_hi) || - // (lhs_hi == rhs_hi && lhs_lo <> rhs_lo) - let cmp3 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); - ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_hi, lhs_hi)); - ctx.emit(Inst::setcc(CC::from_intcc(cc.without_equal()), cmp1)); - ctx.emit(Inst::setcc(CC::Z, cmp2)); - ctx.emit(Inst::cmp_rmi_r(OperandSize::Size64, rhs_lo, lhs_lo)); - ctx.emit(Inst::setcc(CC::from_intcc(cc.unsigned()), cmp3)); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::reg(cmp2.to_reg()), - cmp3, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Or, - RegMemImm::reg(cmp1.to_reg()), - cmp3, - )); - ctx.emit(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::And, - RegMemImm::imm(1), - cmp3, - )); - IntCC::NotEqual - } - _ => panic!("Unhandled IntCC in I128 comparison: {:?}", cc), - } - } else { - // TODO Try to commute the operands (and invert the condition) if one is an immediate. - let lhs = put_input_in_reg(ctx, inputs[0]); - let rhs = input_to_reg_mem_imm(ctx, inputs[1]); - - // Cranelift's icmp semantics want to compare lhs - rhs, while Intel gives - // us dst - src at the machine instruction level, so invert operands. - ctx.emit(Inst::cmp_rmi_r(OperandSize::from_ty(ty), rhs, lhs)); - cc - } -} - fn emit_vm_call>( ctx: &mut C, flags: &Flags, @@ -632,37 +489,6 @@ fn lower_to_amode>(ctx: &mut C, spec: InsnInput, offset: i Amode::imm_reg(offset as u32, input).with_flags(flags) } -fn emit_moves>( - ctx: &mut C, - dst: ValueRegs>, - src: ValueRegs, - ty: Type, -) { - let (_, tys) = Inst::rc_for_type(ty).unwrap(); - for ((dst, src), ty) in dst.regs().iter().zip(src.regs().iter()).zip(tys.iter()) { - ctx.emit(Inst::gen_move(*dst, *src, *ty)); - } -} - -fn emit_cmoves>( - ctx: &mut C, - size: u8, - cc: CC, - src: ValueRegs, - dst: ValueRegs>, -) { - let size = size / src.len() as u8; - let size = u8::max(size, 4); // at least 32 bits - for (dst, src) in dst.regs().iter().zip(src.regs().iter()) { - ctx.emit(Inst::cmove( - OperandSize::from_bytes(size.into()), - cc, - RegMem::reg(*src), - *dst, - )); - } -} - //============================================================================= // Top-level instruction lowering entry point, for one instruction. @@ -797,7 +623,10 @@ fn lower_insn_to_regs>( | Opcode::Trapff | Opcode::GetFramePointer | Opcode::GetStackPointer - | Opcode::GetReturnAddress => { + | Opcode::GetReturnAddress + | Opcode::Select + | Opcode::Selectif + | Opcode::SelectifSpectreGuard => { implemented_in_isle(ctx); } @@ -1881,45 +1710,6 @@ fn lower_insn_to_regs>( ctx.emit(inst); } - Opcode::Select => { - implemented_in_isle(ctx); - } - - Opcode::Selectif | Opcode::SelectifSpectreGuard => { - let lhs = put_input_in_regs(ctx, inputs[1]); - let rhs = put_input_in_regs(ctx, inputs[2]); - let dst = get_output_reg(ctx, outputs[0]); - let ty = ctx.output_ty(insn, 0); - - // Verification ensures that the input is always a single-def ifcmp. - let cmp_insn = ctx - .get_input_as_source_or_const(inputs[0].insn, inputs[0].input) - .inst - .as_inst() - .unwrap() - .0; - debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); - let cond_code = ctx.data(insn).cond_code().unwrap(); - let cond_code = emit_cmp(ctx, cmp_insn, cond_code); - - let cc = CC::from_intcc(cond_code); - - if is_int_or_ref_ty(ty) || ty == types::I128 { - let size = ty.bytes() as u8; - emit_moves(ctx, dst, rhs, ty); - emit_cmoves(ctx, size, cc, lhs, dst); - } else { - debug_assert!(ty == types::F32 || ty == types::F64); - emit_moves(ctx, dst, rhs, ty); - ctx.emit(Inst::xmm_cmove( - ty, - cc, - RegMem::reg(lhs.only_reg().unwrap()), - dst.only_reg().unwrap(), - )); - } - } - Opcode::Udiv | Opcode::Urem | Opcode::Sdiv | Opcode::Srem => { let kind = match op { Opcode::Udiv => DivOrRemKind::UnsignedDiv,