diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 6c2e47b42495..c11d3d5ff83b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1619,6 +1619,18 @@ (decl u64_into_imm_logic (Type u64) ImmLogic) (extern constructor u64_into_imm_logic u64_into_imm_logic) +(decl branch_target (VecMachLabel u8) BranchTarget) +(extern constructor branch_target branch_target) + +(decl targets_jt_size (VecMachLabel) u32) +(extern constructor targets_jt_size targets_jt_size) + +(decl targets_jt_space (VecMachLabel) CodeOffset) +(extern constructor targets_jt_space targets_jt_space) + +(decl targets_jt_info (VecMachLabel) BoxJTSequenceInfo) +(extern constructor targets_jt_info targets_jt_info) + ;; Calculate the minimum floating-point bound for a conversion to floating ;; point from an integer type. ;; Accepts whether the output is signed, the size of the input @@ -1698,6 +1710,9 @@ (decl cond_br_zero (Reg) CondBrKind) (extern constructor cond_br_zero cond_br_zero) +(decl cond_br_not_zero (Reg) CondBrKind) +(extern constructor cond_br_not_zero cond_br_not_zero) + (decl cond_br_cond (Cond) CondBrKind) (extern constructor cond_br_cond cond_br_cond) @@ -2893,6 +2908,11 @@ ;; TODO: Port lower_condcode() to ISLE. (extern constructor cond_code cond_code) +;; Invert a condition code. +(decl invert_cond (Cond) Cond) +;; TODO: Port cond.invert() to ISLE. +(extern constructor invert_cond invert_cond) + ;; Generate comparison to zero operator from input condition code (decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2) (extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op) @@ -3530,3 +3550,65 @@ (rule (lower_select flags cond ty rn rm) (if (ty_int_bool_ref_scalar_64 ty)) (with_flags flags (csel cond rn rm))) + +;; Helper for emitting `MInst.Jump` instructions. +(decl aarch64_jump (BranchTarget) SideEffectNoResult) +(rule (aarch64_jump target) + (SideEffectNoResult.Inst (MInst.Jump target))) + +;; Helper for emitting `MInst.JTSequence` instructions. +;; Emit the compound instruction that does: +;; +;; b.hs default +;; csel rB, xzr, rIndex, hs +;; csdb +;; adr rA, jt +;; ldrsw rB, [rA, rB, uxtw #2] +;; add rA, rA, rB +;; br rA +;; [jt entries] +;; +;; This must be *one* instruction in the vcode because +;; we cannot allow regalloc to insert any spills/fills +;; in the middle of the sequence; otherwise, the ADR's +;; PC-rel offset to the jumptable would be incorrect. +;; (The alternative is to introduce a relocation pass +;; for inlined jumptables, which is much worse, IMHO.) +(decl jt_sequence (Reg BoxJTSequenceInfo) ConsumesFlags) +(rule (jt_sequence ridx info) + (let ((rtmp1 WritableReg (temp_writable_reg $I64)) + (rtmp2 WritableReg (temp_writable_reg $I64))) + (ConsumesFlags.ConsumesFlagsSideEffect + (MInst.JTSequence info ridx rtmp1 rtmp2)))) + +;; Helper for emitting `MInst.CondBr` instructions. +(decl cond_br (BranchTarget BranchTarget CondBrKind) ConsumesFlags) +(rule (cond_br taken not_taken kind) + (ConsumesFlags.ConsumesFlagsSideEffect + (MInst.CondBr taken not_taken kind))) + +;; Helper for emitting `MInst.MovToNZCV` instructions. +(decl mov_to_nzcv (Reg) ProducesFlags) +(rule (mov_to_nzcv rn) + (ProducesFlags.ProducesFlagsSideEffect + (MInst.MovToNZCV rn))) + +;; Helper for emitting `MInst.EmitIsland` instructions. +(decl emit_island (CodeOffset) SideEffectNoResult) +(rule (emit_island needed_space) + (SideEffectNoResult.Inst + (MInst.EmitIsland needed_space))) + +;; Helper for emitting `br_table` sequences. +(decl br_table_impl (u64 Reg VecMachLabel) InstOutput) +(rule (br_table_impl (imm12_from_u64 jt_size) ridx targets) + (let ((jt_info BoxJTSequenceInfo (targets_jt_info targets))) + (side_effect (with_flags_side_effect + (cmp_imm (OperandSize.Size32) ridx jt_size) + (jt_sequence ridx jt_info))))) +(rule -1 (br_table_impl jt_size ridx targets) + (let ((jt_size Reg (imm $I64 (ImmExtend.Zero) jt_size)) + (jt_info BoxJTSequenceInfo (targets_jt_info targets))) + (side_effect (with_flags_side_effect + (cmp (OperandSize.Size32) ridx jt_size) + (jt_sequence ridx jt_info))))) diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 5481ffacd417..22a7d17acfea 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -558,18 +558,6 @@ pub enum ScalarSize { } impl ScalarSize { - /// Convert from a needed width to the smallest size that fits. - pub fn from_bits>(bits: I) -> ScalarSize { - match bits.into().next_power_of_two() { - 8 => ScalarSize::Size8, - 16 => ScalarSize::Size16, - 32 => ScalarSize::Size32, - 64 => ScalarSize::Size64, - 128 => ScalarSize::Size128, - w => panic!("Unexpected type width: {}", w), - } - } - /// Convert to an integer operand size. pub fn operand_size(&self) -> OperandSize { match self { @@ -579,13 +567,6 @@ impl ScalarSize { } } - /// Convert from a type into the smallest size that fits. - pub fn from_ty(ty: Type) -> ScalarSize { - debug_assert!(!ty.is_vector()); - - Self::from_bits(ty_bits(ty)) - } - /// Return the encoding bits that are used by some scalar FP instructions /// for a particular operand size. pub fn ftype(&self) -> u32 { @@ -645,32 +626,6 @@ impl VectorSize { } } - /// Convert from a type into a vector operand size. - pub fn from_ty(ty: Type) -> VectorSize { - debug_assert!(ty.is_vector()); - - match ty { - B8X8 => VectorSize::Size8x8, - B8X16 => VectorSize::Size8x16, - B16X4 => VectorSize::Size16x4, - B16X8 => VectorSize::Size16x8, - B32X2 => VectorSize::Size32x2, - B32X4 => VectorSize::Size32x4, - B64X2 => VectorSize::Size64x2, - F32X2 => VectorSize::Size32x2, - F32X4 => VectorSize::Size32x4, - F64X2 => VectorSize::Size64x2, - I8X8 => VectorSize::Size8x8, - I8X16 => VectorSize::Size8x16, - I16X4 => VectorSize::Size16x4, - I16X8 => VectorSize::Size16x8, - I32X2 => VectorSize::Size32x2, - I32X4 => VectorSize::Size32x4, - I64X2 => VectorSize::Size64x2, - _ => unimplemented!("Unsupported type: {}", ty), - } - } - /// Get the integer operand size that corresponds to a lane of a vector with a certain size. pub fn operand_size(&self) -> OperandSize { match self { diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 3d861bb4ed7f..7d2105f92325 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -4,6 +4,16 @@ ;; register(s) within which the lowered instruction's result values live. (decl lower (Inst) InstOutput) +;; Variant of the main lowering constructor term, which receives an +;; additional argument (a vector of branch targets to be used) for +;; implementing branches. +;; For two-branch instructions, the first target is `taken` and the second +;; `not_taken`, even if it is a Fallthrough instruction: because we reorder +;; blocks while we lower, the fallthrough in the new order is not (necessarily) +;; the same as the fallthrough in CLIF. So, we use the explicitly-provided +;; target. +(decl lower_branch (Inst VecMachLabel) InstOutput) + ;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (iconst (u64_from_imm64 n)))) @@ -2497,12 +2507,185 @@ ;;; Rules for `brz`/`brnz`/`brif`/`brff`/`bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; TODO: requires icmp/fcmp first. +;; `brz` following `icmp`, possibly converted via `bint`. +(rule (lower_branch (brz (icmp cc x @ (value_type ty) y) _ _) targets) + (let ((cond Cond (cond_code cc)) + (cond Cond (invert_cond cond)) ;; negate for `brz` + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (lower_icmp_into_flags cc x y ty) + (cond_br taken not_taken + (cond_br_cond cond)))))) +(rule (lower_branch (brz (bint (icmp cc x @ (value_type ty) y)) _ _) targets) + (let ((cond Cond (cond_code cc)) + (cond Cond (invert_cond cond)) ;; negate for `brz` + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (lower_icmp_into_flags cc x y ty) + (cond_br taken not_taken + (cond_br_cond cond)))))) +;; `brnz` following `icmp`, possibly converted via `bint`. +(rule (lower_branch (brnz (icmp cc x @ (value_type ty) y) _ _) targets) + (let ((cond Cond (cond_code cc)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (lower_icmp_into_flags cc x y ty) + (cond_br taken not_taken + (cond_br_cond cond)))))) +(rule (lower_branch (brnz (bint (icmp cc x @ (value_type ty) y)) _ _) targets) + (let ((cond Cond (cond_code cc)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (lower_icmp_into_flags cc x y ty) + (cond_br taken not_taken + (cond_br_cond cond)))))) +;; `brz` following `fcmp`, possibly converted via `bint`. +(rule (lower_branch (brz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets) + (let ((cond Cond (fp_cond_code cc)) + (cond Cond (invert_cond cond)) ;; negate for `brz` + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (fpu_cmp (scalar_size ty) x y) + (cond_br taken not_taken + (cond_br_cond cond)))))) +(rule (lower_branch (brz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets) + (let ((cond Cond (fp_cond_code cc)) + (cond Cond (invert_cond cond)) ;; negate for `brz` + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (fpu_cmp (scalar_size ty) x y) + (cond_br taken not_taken + (cond_br_cond cond)))))) +;; `brnz` following `fcmp`, possibly converted via `bint`. +(rule (lower_branch (brnz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets) + (let ((cond Cond (fp_cond_code cc)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (fpu_cmp (scalar_size ty) x y) + (cond_br taken not_taken + (cond_br_cond cond)))))) +(rule (lower_branch (brnz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets) + (let ((cond Cond (fp_cond_code cc)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (fpu_cmp (scalar_size ty) x y) + (cond_br taken not_taken + (cond_br_cond cond)))))) +;; standard `brz` +(rule (lower_branch (brz c @ (value_type $I128) _ _) targets) + (let ((flags ProducesFlags (flags_to_producesflags c)) + (c ValueRegs (put_in_regs c)) + (c_lo Reg (value_regs_get c 0)) + (c_hi Reg (value_regs_get c 1)) + (rt Reg (orr $I64 c_lo c_hi)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect flags + (cond_br taken not_taken (cond_br_zero rt)))))) +(rule (lower_branch (brz c @ (value_type ty) _ _) targets) + (if (ty_int_bool_ref_scalar_64 ty)) + (let ((flags ProducesFlags (flags_to_producesflags c)) + (rt Reg (put_in_reg_zext64 c)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect flags + (cond_br taken not_taken (cond_br_zero rt)))))) +;; standard `brnz` +(rule (lower_branch (brnz c @ (value_type $I128) _ _) targets) + (let ((flags ProducesFlags (flags_to_producesflags c)) + (c ValueRegs (put_in_regs c)) + (c_lo Reg (value_regs_get c 0)) + (c_hi Reg (value_regs_get c 1)) + (rt Reg (orr $I64 c_lo c_hi)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect flags + (cond_br taken not_taken (cond_br_not_zero rt)))))) +(rule (lower_branch (brnz c @ (value_type ty) _ _) targets) + (if (ty_int_bool_ref_scalar_64 ty)) + (let ((flags ProducesFlags (flags_to_producesflags c)) + (rt Reg (put_in_reg_zext64 c)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect flags + (cond_br taken not_taken (cond_br_not_zero rt)))))) + +;; `br_icmp` +(rule (lower_branch (br_icmp cc x @ (value_type ty) y _ _) targets) + (let ((cond Cond (cond_code cc)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (lower_icmp_into_flags cc x y ty) + (cond_br taken not_taken + (cond_br_cond cond)))))) + +;; `brif` +(rule (lower_branch (brif cc (ifcmp x @ (value_type ty) y) _ _) targets) + (let ((cond Cond (cond_code cc)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (lower_icmp_into_flags cc x y ty) + (cond_br taken not_taken + (cond_br_cond cond)))))) +;; If the `ifcmp` result is actually placed in a register, we need to move it +;; back into the flags. +(rule -1 (lower_branch (brif cc f _ _) targets) + (let ((cond Cond (cond_code cc)) + (rn Reg (put_in_reg f)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (mov_to_nzcv rn) + (cond_br taken not_taken + (cond_br_cond cond)))))) + +;; `brff` +(rule (lower_branch (brff cc (ffcmp x @ (value_type ty) y) _ _) targets) + (let ((cond Cond (fp_cond_code cc)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (fpu_cmp (scalar_size ty) x y) + (cond_br taken not_taken + (cond_br_cond cond)))))) +;; If the `ffcmp` result is actually placed in a register, we need to move it +;; back into the flags. +(rule -1 (lower_branch (brff cc f _ _) targets) + (let ((cond Cond (fp_cond_code cc)) + (rn Reg (put_in_reg f)) + (taken BranchTarget (branch_target targets 0)) + (not_taken BranchTarget (branch_target targets 1))) + (side_effect + (with_flags_side_effect (mov_to_nzcv rn) + (cond_br taken not_taken + (cond_br_cond cond)))))) ;;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; TODO. +(rule (lower_branch (jump _ _) targets) + (side_effect (aarch64_jump (branch_target targets 0)))) ;;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; TODO. +;; `targets` contains the default target with the list of branch targets +;; concatenated. +(rule (lower_branch (br_table idx _ _) targets) + (let ((jt_size u32 (targets_jt_size targets)) + (_ InstOutput (side_effect + (emit_island (targets_jt_space targets)))) + (ridx Reg (put_in_reg_zext32 idx))) + (br_table_impl (u32_as_u64 jt_size) ridx targets))) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 773991b69321..66c0a21f3936 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -16,112 +16,29 @@ use crate::isa::aarch64::inst::*; use crate::isa::aarch64::AArch64Backend; use crate::machinst::lower::*; use crate::machinst::{Reg, Writable}; +use crate::CodegenResult; use crate::{machinst::*, trace}; -use crate::{CodegenError, CodegenResult}; use smallvec::{smallvec, SmallVec}; -use std::cmp; pub mod isle; -//============================================================================ -// Result enum types. -// -// Lowering of a given value results in one of these enums, depending on the -// modes in which we can accept the value. - -/// A lowering result: register, register-shift. An SSA value can always be -/// lowered into one of these options; the register form is the fallback. -#[derive(Clone, Debug)] -enum ResultRS { - Reg(Reg), - RegShift(Reg, ShiftOpAndAmt), -} - -/// A lowering result: register, register-shift, register-extend. An SSA value can always be -/// lowered into one of these options; the register form is the fallback. -#[derive(Clone, Debug)] -enum ResultRSE { - Reg(Reg), - RegShift(Reg, ShiftOpAndAmt), - RegExtend(Reg, ExtendOp), -} - -impl ResultRSE { - fn from_rs(rs: ResultRS) -> ResultRSE { - match rs { - ResultRS::Reg(r) => ResultRSE::Reg(r), - ResultRS::RegShift(r, s) => ResultRSE::RegShift(r, s), - } - } -} - -/// A lowering result: register, register-shift, register-extend, or 12-bit immediate form. -/// An SSA value can always be lowered into one of these options; the register form is the -/// fallback. -#[derive(Clone, Debug)] -pub(crate) enum ResultRSEImm12 { - Reg(Reg), - RegShift(Reg, ShiftOpAndAmt), - RegExtend(Reg, ExtendOp), - Imm12(Imm12), -} - -impl ResultRSEImm12 { - fn from_rse(rse: ResultRSE) -> ResultRSEImm12 { - match rse { - ResultRSE::Reg(r) => ResultRSEImm12::Reg(r), - ResultRSE::RegShift(r, s) => ResultRSEImm12::RegShift(r, s), - ResultRSE::RegExtend(r, e) => ResultRSEImm12::RegExtend(r, e), - } - } -} - //============================================================================ // Lowering: convert instruction inputs to forms that we can use. -/// Lower an instruction input to a 64-bit constant, if possible. -pub(crate) fn input_to_const(ctx: &mut Lower, input: InsnInput) -> Option { - let input = ctx.get_input_as_source_or_const(input.insn, input.input); - input.constant -} - -/// Lower an instruction input to a constant register-shift amount, if possible. -pub(crate) fn input_to_shiftimm( - ctx: &mut Lower, - input: InsnInput, -) -> Option { - input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift) -} - /// How to handle narrow values loaded into registers; see note on `narrow_mode` /// parameter to `put_input_in_*` below. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(crate) enum NarrowValueMode { None, - /// Zero-extend to 32 bits if original is < 32 bits. - ZeroExtend32, - /// Sign-extend to 32 bits if original is < 32 bits. - SignExtend32, /// Zero-extend to 64 bits if original is < 64 bits. ZeroExtend64, - /// Sign-extend to 64 bits if original is < 64 bits. - SignExtend64, } impl NarrowValueMode { fn is_32bit(&self) -> bool { match self { NarrowValueMode::None => false, - NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => true, - NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => false, - } - } - - fn is_signed(&self) -> bool { - match self { - NarrowValueMode::SignExtend32 | NarrowValueMode::SignExtend64 => true, - NarrowValueMode::ZeroExtend32 | NarrowValueMode::ZeroExtend64 => false, - NarrowValueMode::None => false, + NarrowValueMode::ZeroExtend64 => false, } } } @@ -159,29 +76,6 @@ fn extend_reg( let from_bits = ty_bits(ty) as u8; match (narrow_mode, from_bits) { (NarrowValueMode::None, _) => in_reg, - (NarrowValueMode::ZeroExtend32, n) if n < 32 => { - let tmp = ctx.alloc_tmp(I32).only_reg().unwrap(); - ctx.emit(Inst::Extend { - rd: tmp, - rn: in_reg, - signed: false, - from_bits, - to_bits: 32, - }); - tmp.to_reg() - } - (NarrowValueMode::SignExtend32, n) if n < 32 => { - let tmp = ctx.alloc_tmp(I32).only_reg().unwrap(); - ctx.emit(Inst::Extend { - rd: tmp, - rn: in_reg, - signed: true, - from_bits, - to_bits: 32, - }); - tmp.to_reg() - } - (NarrowValueMode::ZeroExtend32, 32) | (NarrowValueMode::SignExtend32, 32) => in_reg, (NarrowValueMode::ZeroExtend64, n) if n < 64 => { if is_const { @@ -199,17 +93,6 @@ fn extend_reg( tmp.to_reg() } } - (NarrowValueMode::SignExtend64, n) if n < 64 => { - let tmp = ctx.alloc_tmp(I32).only_reg().unwrap(); - ctx.emit(Inst::Extend { - rd: tmp, - rn: in_reg, - signed: true, - from_bits, - to_bits: 64, - }); - tmp.to_reg() - } (_, 64) => in_reg, (_, 128) => in_reg, @@ -261,72 +144,6 @@ fn put_value_in_reg(ctx: &mut Lower, value: Value, narrow_mode: NarrowValu extend_reg(ctx, ty, reg, is_const, narrow_mode) } -/// Lower an instruction input to multiple regs -pub(crate) fn put_input_in_regs(ctx: &mut Lower, input: InsnInput) -> ValueRegs { - let value = ctx.input_as_value(input.insn, input.input); - let (in_regs, _, _) = lower_value_to_regs(ctx, value); - in_regs -} - -/// Lower an instruction input to a reg or reg/shift, or reg/extend operand. -/// -/// The `narrow_mode` flag indicates whether the consumer of this value needs -/// the high bits clear. For many operations, such as an add/sub/mul or any -/// bitwise logical operation, the low-bit results depend only on the low-bit -/// inputs, so e.g. we can do an 8 bit add on 32 bit registers where the 8-bit -/// value is stored in the low 8 bits of the register and the high 24 bits are -/// undefined. If the op truly needs the high N bits clear (such as for a -/// divide or a right-shift or a compare-to-zero), `narrow_mode` should be -/// set to `ZeroExtend` or `SignExtend` as appropriate, and the resulting -/// register will be provided the extended value. -fn put_input_in_rs( - ctx: &mut Lower, - input: InsnInput, - narrow_mode: NarrowValueMode, -) -> ResultRS { - let inputs = ctx.get_input_as_source_or_const(input.insn, input.input); - // Unique or non-unique use is fine for merging here. - if let Some((insn, 0)) = inputs.inst.as_inst() { - let op = ctx.data(insn).opcode(); - - if op == Opcode::Ishl { - let shiftee = InsnInput { insn, input: 0 }; - let shift_amt = InsnInput { insn, input: 1 }; - - // Can we get the shift amount as an immediate? - if let Some(shiftimm) = input_to_shiftimm(ctx, shift_amt) { - let shiftee_bits = ty_bits(ctx.input_ty(insn, 0)); - if shiftee_bits <= std::u8::MAX as usize { - let shiftimm = shiftimm.mask(shiftee_bits as u8); - let reg = put_input_in_reg(ctx, shiftee, narrow_mode); - return ResultRS::RegShift(reg, ShiftOpAndAmt::new(ShiftOp::LSL, shiftimm)); - } - } - } - } - - ResultRS::Reg(put_input_in_reg(ctx, input, narrow_mode)) -} - -/// Lower an instruction input to a reg or reg/shift, or reg/extend operand. -/// This does not actually codegen the source instruction; it just uses the -/// vreg into which the source instruction will generate its value. -/// -/// See note on `put_input_in_rs` for a description of `narrow_mode`. -fn put_input_in_rse( - ctx: &mut Lower, - input: InsnInput, - narrow_mode: NarrowValueMode, -) -> ResultRSE { - let value = ctx.input_as_value(input.insn, input.input); - if let Some((val, extendop)) = get_as_extended_value(ctx, value, narrow_mode) { - let reg = put_value_in_reg(ctx, val, NarrowValueMode::None); - return ResultRSE::RegExtend(reg, extendop); - } - - ResultRSE::from_rs(put_input_in_rs(ctx, input, narrow_mode)) -} - fn get_as_extended_value( ctx: &mut Lower, val: Value, @@ -351,13 +168,8 @@ fn get_as_extended_value( // A single zero-extend or sign-extend is equal to itself. (_, NarrowValueMode::None) => true, // Two zero-extends or sign-extends in a row is equal to a single zero-extend or sign-extend. - (false, NarrowValueMode::ZeroExtend32) | (false, NarrowValueMode::ZeroExtend64) => true, - (true, NarrowValueMode::SignExtend32) | (true, NarrowValueMode::SignExtend64) => true, - // A zero-extend and a sign-extend in a row is not equal to a single zero-extend or sign-extend - (false, NarrowValueMode::SignExtend32) | (false, NarrowValueMode::SignExtend64) => { - false - } - (true, NarrowValueMode::ZeroExtend32) | (true, NarrowValueMode::ZeroExtend64) => false, + (false, NarrowValueMode::ZeroExtend64) => true, + (true, NarrowValueMode::ZeroExtend64) => false, } { let extendop = match (sign_extend, inner_bits) { (true, 8) => ExtendOp::SXTB, @@ -379,25 +191,9 @@ fn get_as_extended_value( && ((narrow_mode.is_32bit() && out_bits < 32) || (!narrow_mode.is_32bit() && out_bits < 64)) { let extendop = match (narrow_mode, out_bits) { - (NarrowValueMode::SignExtend32, 1) | (NarrowValueMode::SignExtend64, 1) => { - ExtendOp::SXTB - } - (NarrowValueMode::ZeroExtend32, 1) | (NarrowValueMode::ZeroExtend64, 1) => { - ExtendOp::UXTB - } - (NarrowValueMode::SignExtend32, 8) | (NarrowValueMode::SignExtend64, 8) => { - ExtendOp::SXTB - } - (NarrowValueMode::ZeroExtend32, 8) | (NarrowValueMode::ZeroExtend64, 8) => { - ExtendOp::UXTB - } - (NarrowValueMode::SignExtend32, 16) | (NarrowValueMode::SignExtend64, 16) => { - ExtendOp::SXTH - } - (NarrowValueMode::ZeroExtend32, 16) | (NarrowValueMode::ZeroExtend64, 16) => { - ExtendOp::UXTH - } - (NarrowValueMode::SignExtend64, 32) => ExtendOp::SXTW, + (NarrowValueMode::ZeroExtend64, 1) => ExtendOp::UXTB, + (NarrowValueMode::ZeroExtend64, 8) => ExtendOp::UXTB, + (NarrowValueMode::ZeroExtend64, 16) => ExtendOp::UXTH, (NarrowValueMode::ZeroExtend64, 32) => ExtendOp::UXTW, _ => unreachable!(), }; @@ -406,73 +202,6 @@ fn get_as_extended_value( None } -pub(crate) fn put_input_in_rse_imm12( - ctx: &mut Lower, - input: InsnInput, - narrow_mode: NarrowValueMode, -) -> ResultRSEImm12 { - if let Some(imm_value) = input_to_const(ctx, input) { - if let Some(i) = Imm12::maybe_from_u64(imm_value) { - let out_ty_bits = ty_bits(ctx.input_ty(input.insn, input.input)); - let is_negative = (i.bits as u64) & (1 << (cmp::max(out_ty_bits, 1) - 1)) != 0; - - // This condition can happen if we matched a value that overflows the output type of - // its `iconst` when viewed as a signed value (i.e. iconst.i8 200). - // When that happens we need to lower as a negative value, which we cannot do here. - if !(narrow_mode.is_signed() && is_negative) { - return ResultRSEImm12::Imm12(i); - } - } - } - - ResultRSEImm12::from_rse(put_input_in_rse(ctx, input, narrow_mode)) -} - -//============================================================================ -// ALU instruction constructors. - -pub(crate) fn alu_inst_imm12( - op: ALUOp, - ty: Type, - rd: Writable, - rn: Reg, - rm: ResultRSEImm12, -) -> Inst { - let size = OperandSize::from_ty(ty); - match rm { - ResultRSEImm12::Imm12(imm12) => Inst::AluRRImm12 { - alu_op: op, - size, - rd, - rn, - imm12, - }, - ResultRSEImm12::Reg(rm) => Inst::AluRRR { - alu_op: op, - size, - rd, - rn, - rm, - }, - ResultRSEImm12::RegShift(rm, shiftop) => Inst::AluRRRShift { - alu_op: op, - size, - rd, - rn, - rm, - shiftop, - }, - ResultRSEImm12::RegExtend(rm, extendop) => Inst::AluRRRExtend { - alu_op: op, - size, - rd, - rn, - rm, - extendop, - }, - } -} - //============================================================================ // Lowering: addressing mode support. Takes instruction directly, rather // than an `InsnInput`, to do more introspection. @@ -967,129 +696,6 @@ pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond { } } -pub(crate) fn lower_vector_compare( - ctx: &mut Lower, - rd: Writable, - mut rn: Reg, - mut rm: Reg, - ty: Type, - cond: Cond, -) -> CodegenResult<()> { - let is_float = ty.lane_type().is_float(); - let size = VectorSize::from_ty(ty); - - if is_float && (cond == Cond::Vc || cond == Cond::Vs) { - let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); - - ctx.emit(Inst::VecRRR { - alu_op: VecALUOp::Fcmeq, - rd, - rn, - rm: rn, - size, - }); - ctx.emit(Inst::VecRRR { - alu_op: VecALUOp::Fcmeq, - rd: tmp, - rn: rm, - rm, - size, - }); - ctx.emit(Inst::VecRRR { - alu_op: VecALUOp::And, - rd, - rn: rd.to_reg(), - rm: tmp.to_reg(), - size, - }); - - if cond == Cond::Vs { - ctx.emit(Inst::VecMisc { - op: VecMisc2::Not, - rd, - rn: rd.to_reg(), - size, - }); - } - } else { - // 'Less than' operations are implemented by swapping - // the order of operands and using the 'greater than' - // instructions. - // 'Not equal' is implemented with 'equal' and inverting - // the result. - let (alu_op, swap) = match (is_float, cond) { - (false, Cond::Eq) => (VecALUOp::Cmeq, false), - (false, Cond::Ne) => (VecALUOp::Cmeq, false), - (false, Cond::Ge) => (VecALUOp::Cmge, false), - (false, Cond::Gt) => (VecALUOp::Cmgt, false), - (false, Cond::Le) => (VecALUOp::Cmge, true), - (false, Cond::Lt) => (VecALUOp::Cmgt, true), - (false, Cond::Hs) => (VecALUOp::Cmhs, false), - (false, Cond::Hi) => (VecALUOp::Cmhi, false), - (false, Cond::Ls) => (VecALUOp::Cmhs, true), - (false, Cond::Lo) => (VecALUOp::Cmhi, true), - (true, Cond::Eq) => (VecALUOp::Fcmeq, false), - (true, Cond::Ne) => (VecALUOp::Fcmeq, false), - (true, Cond::Mi) => (VecALUOp::Fcmgt, true), - (true, Cond::Ls) => (VecALUOp::Fcmge, true), - (true, Cond::Ge) => (VecALUOp::Fcmge, false), - (true, Cond::Gt) => (VecALUOp::Fcmgt, false), - _ => { - return Err(CodegenError::Unsupported(format!( - "Unsupported {} SIMD vector comparison: {:?}", - if is_float { - "floating-point" - } else { - "integer" - }, - cond - ))) - } - }; - - if swap { - std::mem::swap(&mut rn, &mut rm); - } - - ctx.emit(Inst::VecRRR { - alu_op, - rd, - rn, - rm, - size, - }); - - if cond == Cond::Ne { - ctx.emit(Inst::VecMisc { - op: VecMisc2::Not, - rd, - rn: rd.to_reg(), - size, - }); - } - } - - Ok(()) -} - -/// Determines whether this condcode interprets inputs as signed or unsigned. See the -/// documentation for the `icmp` instruction in cranelift-codegen/meta/src/shared/instructions.rs -/// for further insights into this. -pub(crate) fn condcode_is_signed(cc: IntCC) -> bool { - match cc { - IntCC::Equal - | IntCC::UnsignedGreaterThanOrEqual - | IntCC::UnsignedGreaterThan - | IntCC::UnsignedLessThanOrEqual - | IntCC::UnsignedLessThan - | IntCC::NotEqual => false, - IntCC::SignedGreaterThanOrEqual - | IntCC::SignedGreaterThan - | IntCC::SignedLessThanOrEqual - | IntCC::SignedLessThan => true, - } -} - //============================================================================= // Helpers for instruction lowering. @@ -1142,256 +748,6 @@ pub(crate) fn maybe_value_multi( None } -/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g., -/// Bint or a bitcast). -/// -/// FIXME cfallin 2020-03-30: this is really ugly. Factor out tree-matching stuff and make it -/// a bit more generic. -pub(crate) fn maybe_input_insn_via_conv( - c: &mut Lower, - input: InsnInput, - op: Opcode, - conv: Opcode, -) -> Option { - let inputs = c.get_input_as_source_or_const(input.insn, input.input); - if let Some((src_inst, _)) = inputs.inst.as_inst() { - let data = c.data(src_inst); - if data.opcode() == op { - return Some(src_inst); - } - if data.opcode() == conv { - let inputs = c.get_input_as_source_or_const(src_inst, 0); - if let Some((src_inst, _)) = inputs.inst.as_inst() { - let data = c.data(src_inst); - if data.opcode() == op { - return Some(src_inst); - } - } - } - } - None -} - -/// Specifies what [lower_icmp] should do when lowering -#[derive(Debug, Clone, PartialEq)] -pub(crate) enum IcmpOutput { - /// Lowers the comparison into a cond code, discarding the results. The cond code emitted can - /// be checked in the resulting [IcmpResult]. - CondCode, -} - -impl IcmpOutput { - pub fn reg(&self) -> Option> { - match self { - IcmpOutput::CondCode => None, - } - } -} - -/// The output of an Icmp lowering. -#[derive(Debug, Clone, PartialEq)] -pub(crate) enum IcmpResult { - /// The result was output into the given [Cond]. Callers may perform operations using this [Cond] - /// and its inverse, other [Cond]'s are not guaranteed to be correct. - CondCode(Cond), -} - -impl IcmpResult { - pub fn unwrap_cond(&self) -> Cond { - match self { - IcmpResult::CondCode(c) => *c, - } - } -} - -/// Lower an icmp comparision -/// -/// We can lower into the status flags, or materialize the result into a register -/// This is controlled by the `output` parameter. -pub(crate) fn lower_icmp( - ctx: &mut Lower, - insn: IRInst, - condcode: IntCC, - output: IcmpOutput, -) -> CodegenResult { - trace!( - "lower_icmp: insn {}, condcode: {}, output: {:?}", - insn, - condcode, - output - ); - - let rd = output.reg().unwrap_or(writable_zero_reg()); - let inputs = insn_inputs(ctx, insn); - let cond = lower_condcode(condcode); - let is_signed = condcode_is_signed(condcode); - let ty = ctx.input_ty(insn, 0); - let bits = ty_bits(ty); - let narrow_mode = match (bits <= 32, is_signed) { - (true, true) => NarrowValueMode::SignExtend32, - (true, false) => NarrowValueMode::ZeroExtend32, - (false, true) => NarrowValueMode::SignExtend64, - (false, false) => NarrowValueMode::ZeroExtend64, - }; - let mut should_materialize = output.reg().is_some(); - - let out_condcode = if ty == I128 { - let lhs = put_input_in_regs(ctx, inputs[0]); - let rhs = put_input_in_regs(ctx, inputs[1]); - - let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap(); - let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap(); - - match condcode { - IntCC::Equal | IntCC::NotEqual => { - // cmp lhs_lo, rhs_lo - // ccmp lhs_hi, rhs_hi, #0, eq - // cset dst, {eq, ne} - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::SubS, - size: OperandSize::Size64, - rd: writable_zero_reg(), - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - ctx.emit(Inst::CCmp { - size: OperandSize::Size64, - rn: lhs.regs()[1], - rm: rhs.regs()[1], - nzcv: NZCV::new(false, false, false, false), - cond: Cond::Eq, - }); - cond - } - _ => { - // cmp lhs_lo, rhs_lo - // cset tmp1, unsigned_cond - // cmp lhs_hi, rhs_hi - // cset tmp2, cond - // csel dst, tmp1, tmp2, eq - - let rd = output.reg().unwrap_or(tmp1); - let unsigned_cond = lower_condcode(condcode.unsigned()); - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::SubS, - size: OperandSize::Size64, - rd: writable_zero_reg(), - rn: lhs.regs()[0], - rm: rhs.regs()[0], - }); - materialize_bool_result(ctx, insn, tmp1, unsigned_cond); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::SubS, - size: OperandSize::Size64, - rd: writable_zero_reg(), - rn: lhs.regs()[1], - rm: rhs.regs()[1], - }); - materialize_bool_result(ctx, insn, tmp2, cond); - ctx.emit(Inst::CSel { - cond: Cond::Eq, - rd, - rn: tmp1.to_reg(), - rm: tmp2.to_reg(), - }); - - if output == IcmpOutput::CondCode { - // We only need to guarantee that the flags for `cond` are correct, so we can - // compare rd with 0 or 1 - - // If we are doing compare or equal, we want to compare with 1 instead of zero - if condcode.without_equal() != condcode { - lower_constant_u64(ctx, tmp2, 1); - } - - let xzr = zero_reg(); - let rd = rd.to_reg(); - let tmp2 = tmp2.to_reg(); - let (rn, rm) = match condcode { - IntCC::SignedGreaterThanOrEqual => (rd, tmp2), - IntCC::UnsignedGreaterThanOrEqual => (rd, tmp2), - IntCC::SignedLessThanOrEqual => (tmp2, rd), - IntCC::UnsignedLessThanOrEqual => (tmp2, rd), - IntCC::SignedGreaterThan => (rd, xzr), - IntCC::UnsignedGreaterThan => (rd, xzr), - IntCC::SignedLessThan => (xzr, rd), - IntCC::UnsignedLessThan => (xzr, rd), - _ => unreachable!(), - }; - - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::SubS, - size: OperandSize::Size64, - rd: writable_zero_reg(), - rn, - rm, - }); - } - - // Prevent a second materialize_bool_result to be emitted at the end of the function - should_materialize = false; - cond - } - } - } else if ty.is_vector() { - assert_ne!(output, IcmpOutput::CondCode); - should_materialize = false; - - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); - lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; - cond - } else { - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); - ctx.emit(alu_inst_imm12(ALUOp::SubS, ty, writable_zero_reg(), rn, rm)); - cond - }; - - // Most of the comparisons above produce flags by default, if the caller requested the result - // in a register we materialize those flags into a register. Some branches do end up producing - // the result as a register by default, so we ignore those. - if should_materialize { - materialize_bool_result(ctx, insn, rd, out_condcode); - } - - Ok(match output { - IcmpOutput::CondCode => IcmpResult::CondCode(out_condcode), - }) -} - -pub(crate) fn lower_fcmp_or_ffcmp_to_flags(ctx: &mut Lower, insn: IRInst) { - let ty = ctx.input_ty(insn, 0); - let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); - ctx.emit(Inst::FpuCmp { - size: ScalarSize::from_ty(ty), - rn, - rm, - }); -} - -/// Materialize a boolean value into a register from the flags -/// (e.g set by a comparison). -/// A 0 / -1 (all-ones) result as expected for bool operations. -pub(crate) fn materialize_bool_result( - ctx: &mut Lower, - insn: IRInst, - rd: Writable, - cond: Cond, -) { - // A boolean is 0 / -1; if output width is > 1 use `csetm`, - // otherwise use `cset`. - if ty_bits(ctx.output_ty(insn, 0)) > 1 { - ctx.emit(Inst::CSetm { rd, cond }); - } else { - ctx.emit(Inst::CSet { rd, cond }); - } -} - //============================================================================= // Lowering-backend trait implementation. @@ -1408,7 +764,33 @@ impl LowerBackend for AArch64Backend { branches: &[IRInst], targets: &[MachLabel], ) -> CodegenResult<()> { - lower_inst::lower_branch(ctx, branches, targets) + // A block should end with at most two branches. The first may be a + // conditional branch; a conditional branch can be followed only by an + // unconditional branch or fallthrough. Otherwise, if only one branch, + // it may be an unconditional branch, a fallthrough, a return, or a + // trap. These conditions are verified by `is_ebb_basic()` during the + // verifier pass. + assert!(branches.len() <= 2); + if branches.len() == 2 { + let op1 = ctx.data(branches[1]).opcode(); + assert!(op1 == Opcode::Jump); + } + + if let Ok(()) = super::lower::isle::lower_branch( + ctx, + &self.triple, + &self.flags, + &self.isa_flags, + branches[0], + targets, + ) { + return Ok(()); + } + + unreachable!( + "implemented in ISLE: branch = `{}`", + ctx.dfg().display_inst(branches[0]), + ); } fn maybe_pinned_reg(&self) -> Option { diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 7e19ddcc4b77..530828013402 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -67,6 +67,25 @@ pub(crate) fn lower( ) } +pub(crate) fn lower_branch( + lower_ctx: &mut Lower, + triple: &Triple, + flags: &Flags, + isa_flags: &IsaFlags, + branch: Inst, + targets: &[MachLabel], +) -> Result<(), ()> { + lower_common( + lower_ctx, + triple, + flags, + isa_flags, + &[], + branch, + |cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()), + ) +} + pub struct ExtendedValue { val: Value, extend: ExtendOp, @@ -342,6 +361,10 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { CondBrKind::Zero(reg) } + fn cond_br_not_zero(&mut self, reg: Reg) -> CondBrKind { + CondBrKind::NotZero(reg) + } + fn cond_br_cond(&mut self, cond: &Cond) -> CondBrKind { CondBrKind::Cond(*cond) } @@ -521,6 +544,9 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { lower_condcode(*cc) } + fn invert_cond(&mut self, cond: &Cond) -> Cond { + (*cond).invert() + } fn preg_sp(&mut self) -> PReg { super::regs::stack_reg().to_real_reg().unwrap().into() } @@ -533,6 +559,34 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { super::regs::link_reg().to_real_reg().unwrap().into() } + fn branch_target(&mut self, elements: &VecMachLabel, idx: u8) -> BranchTarget { + BranchTarget::Label(elements[idx as usize]) + } + + fn targets_jt_size(&mut self, elements: &VecMachLabel) -> u32 { + (elements.len() - 1) as u32 + } + + fn targets_jt_space(&mut self, elements: &VecMachLabel) -> CodeOffset { + // calculate the number of bytes needed for the jumptable sequence: + // 4 bytes per instruction, with 8 instructions base + the size of + // the jumptable more. + 4 * (8 + self.targets_jt_size(elements)) + } + + fn targets_jt_info(&mut self, elements: &VecMachLabel) -> BoxJTSequenceInfo { + let targets: Vec = elements + .iter() + .skip(1) + .map(|bix| BranchTarget::Label(*bix)) + .collect(); + let default_target = BranchTarget::Label(elements[0]); + Box::new(JTSequenceInfo { + targets, + default_target, + }) + } + fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg { let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap(); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 580aea9ec298..cb875e5f73a4 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1,8 +1,5 @@ //! Lower a single Cranelift instruction into vcode. -use super::lower::*; -use crate::binemit::CodeOffset; -use crate::ir::types::*; use crate::ir::Inst as IRInst; use crate::ir::Opcode; use crate::isa::aarch64::inst::*; @@ -11,8 +8,6 @@ use crate::machinst::lower::*; use crate::machinst::*; use crate::settings::Flags; use crate::{CodegenError, CodegenResult}; -use alloc::boxed::Box; -use alloc::vec::Vec; use target_lexicon::Triple; /// Actually codegen an instruction's results into registers. @@ -323,269 +318,3 @@ pub(crate) fn lower_insn_to_regs( Ok(()) } - -pub(crate) fn lower_branch( - ctx: &mut Lower, - branches: &[IRInst], - targets: &[MachLabel], -) -> CodegenResult<()> { - // A block should end with at most two branches. The first may be a - // conditional branch; a conditional branch can be followed only by an - // unconditional branch or fallthrough. Otherwise, if only one branch, - // it may be an unconditional branch, a fallthrough, a return, or a - // trap. These conditions are verified by `is_ebb_basic()` during the - // verifier pass. - assert!(branches.len() <= 2); - - if branches.len() == 2 { - // Must be a conditional branch followed by an unconditional branch. - let op0 = ctx.data(branches[0]).opcode(); - let op1 = ctx.data(branches[1]).opcode(); - - assert!(op1 == Opcode::Jump); - let taken = BranchTarget::Label(targets[0]); - // not_taken target is the target of the second branch, even if it is a Fallthrough - // instruction: because we reorder blocks while we lower, the fallthrough in the new - // order is not (necessarily) the same as the fallthrough in CLIF. So we use the - // explicitly-provided target. - let not_taken = BranchTarget::Label(targets[1]); - - match op0 { - Opcode::Brz | Opcode::Brnz => { - let ty = ctx.input_ty(branches[0], 0); - let flag_input = InsnInput { - insn: branches[0], - input: 0, - }; - if let Some(icmp_insn) = - maybe_input_insn_via_conv(ctx, flag_input, Opcode::Icmp, Opcode::Bint) - { - let condcode = ctx.data(icmp_insn).cond_code().unwrap(); - let cond = - lower_icmp(ctx, icmp_insn, condcode, IcmpOutput::CondCode)?.unwrap_cond(); - let negated = op0 == Opcode::Brz; - let cond = if negated { cond.invert() } else { cond }; - - ctx.emit(Inst::CondBr { - taken, - not_taken, - kind: CondBrKind::Cond(cond), - }); - } else if let Some(fcmp_insn) = - maybe_input_insn_via_conv(ctx, flag_input, Opcode::Fcmp, Opcode::Bint) - { - let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap(); - let cond = lower_fp_condcode(condcode); - let negated = op0 == Opcode::Brz; - let cond = if negated { cond.invert() } else { cond }; - - lower_fcmp_or_ffcmp_to_flags(ctx, fcmp_insn); - ctx.emit(Inst::CondBr { - taken, - not_taken, - kind: CondBrKind::Cond(cond), - }); - } else { - let rt = if ty == I128 { - let tmp = ctx.alloc_tmp(I64).only_reg().unwrap(); - let input = put_input_in_regs(ctx, flag_input); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Orr, - size: OperandSize::Size64, - rd: tmp, - rn: input.regs()[0], - rm: input.regs()[1], - }); - tmp.to_reg() - } else { - put_input_in_reg(ctx, flag_input, NarrowValueMode::ZeroExtend64) - }; - let kind = match op0 { - Opcode::Brz => CondBrKind::Zero(rt), - Opcode::Brnz => CondBrKind::NotZero(rt), - _ => unreachable!(), - }; - ctx.emit(Inst::CondBr { - taken, - not_taken, - kind, - }); - } - } - Opcode::BrIcmp => { - let condcode = ctx.data(branches[0]).cond_code().unwrap(); - let cond = - lower_icmp(ctx, branches[0], condcode, IcmpOutput::CondCode)?.unwrap_cond(); - - ctx.emit(Inst::CondBr { - taken, - not_taken, - kind: CondBrKind::Cond(cond), - }); - } - - Opcode::Brif => { - let condcode = ctx.data(branches[0]).cond_code().unwrap(); - - let flag_input = InsnInput { - insn: branches[0], - input: 0, - }; - if let Some(ifcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ifcmp) { - let cond = - lower_icmp(ctx, ifcmp_insn, condcode, IcmpOutput::CondCode)?.unwrap_cond(); - ctx.emit(Inst::CondBr { - taken, - not_taken, - kind: CondBrKind::Cond(cond), - }); - } else { - // If the ifcmp result is actually placed in a - // register, we need to move it back into the flags. - let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None); - ctx.emit(Inst::MovToNZCV { rn }); - ctx.emit(Inst::CondBr { - taken, - not_taken, - kind: CondBrKind::Cond(lower_condcode(condcode)), - }); - } - } - - Opcode::Brff => { - let condcode = ctx.data(branches[0]).fp_cond_code().unwrap(); - let cond = lower_fp_condcode(condcode); - let kind = CondBrKind::Cond(cond); - let flag_input = InsnInput { - insn: branches[0], - input: 0, - }; - if let Some(ffcmp_insn) = maybe_input_insn(ctx, flag_input, Opcode::Ffcmp) { - lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn); - ctx.emit(Inst::CondBr { - taken, - not_taken, - kind, - }); - } else { - // If the ffcmp result is actually placed in a - // register, we need to move it back into the flags. - let rn = put_input_in_reg(ctx, flag_input, NarrowValueMode::None); - ctx.emit(Inst::MovToNZCV { rn }); - ctx.emit(Inst::CondBr { - taken, - not_taken, - kind, - }); - } - } - - _ => unimplemented!(), - } - } else { - // Must be an unconditional branch or an indirect branch. - let op = ctx.data(branches[0]).opcode(); - match op { - Opcode::Jump => { - assert!(branches.len() == 1); - ctx.emit(Inst::Jump { - dest: BranchTarget::Label(targets[0]), - }); - } - - Opcode::BrTable => { - // Expand `br_table index, default, JT` to: - // - // emit_island // this forces an island at this point - // // if the jumptable would push us past - // // the deadline - // cmp idx, #jt_size - // b.hs default - // csel vTmp2, xzr, idx, hs - // csdb - // adr vTmp1, PC+16 - // ldr vTmp2, [vTmp1, vTmp2, uxtw #2] - // add vTmp1, vTmp1, vTmp2 - // br vTmp1 - // [jumptable offsets relative to JT base] - let jt_size = targets.len() - 1; - assert!(jt_size <= std::u32::MAX as usize); - - ctx.emit(Inst::EmitIsland { - needed_space: 4 * (8 + jt_size) as CodeOffset, - }); - - let ridx = put_input_in_reg( - ctx, - InsnInput { - insn: branches[0], - input: 0, - }, - NarrowValueMode::ZeroExtend32, - ); - - let rtmp1 = ctx.alloc_tmp(I32).only_reg().unwrap(); - let rtmp2 = ctx.alloc_tmp(I32).only_reg().unwrap(); - - // Bounds-check, leaving condition codes for JTSequence's - // branch to default target below. - if let Some(imm12) = Imm12::maybe_from_u64(jt_size as u64) { - ctx.emit(Inst::AluRRImm12 { - alu_op: ALUOp::SubS, - size: OperandSize::Size32, - rd: writable_zero_reg(), - rn: ridx, - imm12, - }); - } else { - lower_constant_u64(ctx, rtmp1, jt_size as u64); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::SubS, - size: OperandSize::Size32, - rd: writable_zero_reg(), - rn: ridx, - rm: rtmp1.to_reg(), - }); - } - - // Emit the compound instruction that does: - // - // b.hs default - // csel rB, xzr, rIndex, hs - // csdb - // adr rA, jt - // ldrsw rB, [rA, rB, uxtw #2] - // add rA, rA, rB - // br rA - // [jt entries] - // - // This must be *one* instruction in the vcode because - // we cannot allow regalloc to insert any spills/fills - // in the middle of the sequence; otherwise, the ADR's - // PC-rel offset to the jumptable would be incorrect. - // (The alternative is to introduce a relocation pass - // for inlined jumptables, which is much worse, IMHO.) - - let jt_targets: Vec = targets - .iter() - .skip(1) - .map(|bix| BranchTarget::Label(*bix)) - .collect(); - let default_target = BranchTarget::Label(targets[0]); - ctx.emit(Inst::JTSequence { - ridx, - rtmp1, - rtmp2, - info: Box::new(JTSequenceInfo { - targets: jt_targets, - default_target, - }), - }); - } - - _ => panic!("Unknown branch type!"), - } - } - - Ok(()) -} diff --git a/cranelift/codegen/src/machinst/inst_common.rs b/cranelift/codegen/src/machinst/inst_common.rs index 0f13a912fd74..7c693c38a679 100644 --- a/cranelift/codegen/src/machinst/inst_common.rs +++ b/cranelift/codegen/src/machinst/inst_common.rs @@ -24,12 +24,6 @@ pub(crate) struct InsnOutput { pub(crate) output: usize, } -pub(crate) fn insn_inputs(ctx: &Lower, insn: IRInst) -> SmallVec<[InsnInput; 4]> { - (0..ctx.num_inputs(insn)) - .map(|i| InsnInput { insn, input: i }) - .collect() -} - pub(crate) fn insn_outputs( ctx: &Lower, insn: IRInst, diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index d8a4458a2ae3..979518f4ccec 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -853,6 +853,11 @@ (ConsumesFlags.ConsumesFlagsSideEffect2 c1 c2)) (SideEffectNoResult.Inst3 p c1 c2)) +(rule (with_flags_side_effect + (ProducesFlags.ProducesFlagsTwiceSideEffect p1 p2) + (ConsumesFlags.ConsumesFlagsSideEffect c)) + (SideEffectNoResult.Inst3 p1 p2 c)) + ;;;; Helpers for Working with TrapCode ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl trap_code_division_by_zero () TrapCode) diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index 923233a5ba1d..dd30f7ea8b7a 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -290,8 +290,8 @@ block1: ; cset x6, lo ; subs xzr, x1, x3 ; cset x9, lt -; csel x6, x6, x9, eq -; subs xzr, xzr, x6 +; csel x11, x6, x9, eq +; subs xzr, xzr, x11 ; b.lt label1 ; b label2 ; block1: ; b label3 @@ -314,8 +314,8 @@ block1: ; cset x6, lo ; subs xzr, x1, x3 ; cset x9, lo -; csel x6, x6, x9, eq -; subs xzr, xzr, x6 +; csel x11, x6, x9, eq +; subs xzr, xzr, x11 ; b.lo label1 ; b label2 ; block1: ; b label3 @@ -338,9 +338,9 @@ block1: ; cset x6, ls ; subs xzr, x1, x3 ; cset x9, le -; csel x6, x6, x9, eq -; movz x9, #1 -; subs xzr, x9, x6 +; csel x11, x6, x9, eq +; movz w13, #1 +; subs xzr, x13, x11 ; b.le label1 ; b label2 ; block1: ; b label3 @@ -363,9 +363,9 @@ block1: ; cset x6, ls ; subs xzr, x1, x3 ; cset x9, ls -; csel x6, x6, x9, eq -; movz x9, #1 -; subs xzr, x9, x6 +; csel x11, x6, x9, eq +; orr x13, xzr, #1 +; subs xzr, x13, x11 ; b.ls label1 ; b label2 ; block1: ; b label3 @@ -388,8 +388,8 @@ block1: ; cset x6, hi ; subs xzr, x1, x3 ; cset x9, gt -; csel x6, x6, x9, eq -; subs xzr, x6, xzr +; csel x11, x6, x9, eq +; subs xzr, x11, xzr ; b.gt label1 ; b label2 ; block1: ; b label3 @@ -412,8 +412,8 @@ block1: ; cset x6, hi ; subs xzr, x1, x3 ; cset x9, hi -; csel x6, x6, x9, eq -; subs xzr, x6, xzr +; csel x11, x6, x9, eq +; subs xzr, x11, xzr ; b.hi label1 ; b label2 ; block1: ; b label3 @@ -436,9 +436,9 @@ block1: ; cset x6, hs ; subs xzr, x1, x3 ; cset x9, ge -; csel x6, x6, x9, eq -; movz x9, #1 -; subs xzr, x6, x9 +; csel x11, x6, x9, eq +; movz w13, #1 +; subs xzr, x11, x13 ; b.ge label1 ; b label2 ; block1: ; b label3 @@ -461,9 +461,9 @@ block1: ; cset x6, hs ; subs xzr, x1, x3 ; cset x9, hs -; csel x6, x6, x9, eq -; movz x9, #1 -; subs xzr, x6, x9 +; csel x11, x6, x9, eq +; orr x13, xzr, #1 +; subs xzr, x11, x13 ; b.hs label1 ; b label2 ; block1: ; b label3