Skip to content

Commit

Permalink
Port branches to ISLE (AArch64) (#4943)
Browse files Browse the repository at this point in the history
* Port branches to ISLE (AArch64)

Ported the existing implementations of the following opcodes for AArch64
to ISLE:
- `Brz`
- `Brnz`
- `Brif`
- `Brff`
- `BrIcmp`
- `Jump`
- `BrTable`

Copyright (c) 2022 Arm Limited

* Remove dead code

Copyright (c) 2022 Arm Limited
  • Loading branch information
dheaton-arm authored Sep 26, 2022
1 parent 11e9004 commit 3a2b32b
Show file tree
Hide file tree
Showing 9 changed files with 381 additions and 997 deletions.
82 changes: 82 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1619,6 +1619,18 @@
(decl u64_into_imm_logic (Type u64) ImmLogic)
(extern constructor u64_into_imm_logic u64_into_imm_logic)

(decl branch_target (VecMachLabel u8) BranchTarget)
(extern constructor branch_target branch_target)

(decl targets_jt_size (VecMachLabel) u32)
(extern constructor targets_jt_size targets_jt_size)

(decl targets_jt_space (VecMachLabel) CodeOffset)
(extern constructor targets_jt_space targets_jt_space)

(decl targets_jt_info (VecMachLabel) BoxJTSequenceInfo)
(extern constructor targets_jt_info targets_jt_info)

;; Calculate the minimum floating-point bound for a conversion to floating
;; point from an integer type.
;; Accepts whether the output is signed, the size of the input
Expand Down Expand Up @@ -1698,6 +1710,9 @@
(decl cond_br_zero (Reg) CondBrKind)
(extern constructor cond_br_zero cond_br_zero)

(decl cond_br_not_zero (Reg) CondBrKind)
(extern constructor cond_br_not_zero cond_br_not_zero)

(decl cond_br_cond (Cond) CondBrKind)
(extern constructor cond_br_cond cond_br_cond)

Expand Down Expand Up @@ -2893,6 +2908,11 @@
;; TODO: Port lower_condcode() to ISLE.
(extern constructor cond_code cond_code)

;; Invert a condition code.
(decl invert_cond (Cond) Cond)
;; TODO: Port cond.invert() to ISLE.
(extern constructor invert_cond invert_cond)

;; Generate comparison to zero operator from input condition code
(decl float_cc_cmp_zero_to_vec_misc_op (FloatCC) VecMisc2)
(extern constructor float_cc_cmp_zero_to_vec_misc_op float_cc_cmp_zero_to_vec_misc_op)
Expand Down Expand Up @@ -3530,3 +3550,65 @@
(rule (lower_select flags cond ty rn rm)
(if (ty_int_bool_ref_scalar_64 ty))
(with_flags flags (csel cond rn rm)))

;; Helper for emitting `MInst.Jump` instructions.
(decl aarch64_jump (BranchTarget) SideEffectNoResult)
(rule (aarch64_jump target)
(SideEffectNoResult.Inst (MInst.Jump target)))

;; Helper for emitting `MInst.JTSequence` instructions.
;; Emit the compound instruction that does:
;;
;; b.hs default
;; csel rB, xzr, rIndex, hs
;; csdb
;; adr rA, jt
;; ldrsw rB, [rA, rB, uxtw #2]
;; add rA, rA, rB
;; br rA
;; [jt entries]
;;
;; This must be *one* instruction in the vcode because
;; we cannot allow regalloc to insert any spills/fills
;; in the middle of the sequence; otherwise, the ADR's
;; PC-rel offset to the jumptable would be incorrect.
;; (The alternative is to introduce a relocation pass
;; for inlined jumptables, which is much worse, IMHO.)
(decl jt_sequence (Reg BoxJTSequenceInfo) ConsumesFlags)
(rule (jt_sequence ridx info)
(let ((rtmp1 WritableReg (temp_writable_reg $I64))
(rtmp2 WritableReg (temp_writable_reg $I64)))
(ConsumesFlags.ConsumesFlagsSideEffect
(MInst.JTSequence info ridx rtmp1 rtmp2))))

;; Helper for emitting `MInst.CondBr` instructions.
(decl cond_br (BranchTarget BranchTarget CondBrKind) ConsumesFlags)
(rule (cond_br taken not_taken kind)
(ConsumesFlags.ConsumesFlagsSideEffect
(MInst.CondBr taken not_taken kind)))

;; Helper for emitting `MInst.MovToNZCV` instructions.
(decl mov_to_nzcv (Reg) ProducesFlags)
(rule (mov_to_nzcv rn)
(ProducesFlags.ProducesFlagsSideEffect
(MInst.MovToNZCV rn)))

;; Helper for emitting `MInst.EmitIsland` instructions.
(decl emit_island (CodeOffset) SideEffectNoResult)
(rule (emit_island needed_space)
(SideEffectNoResult.Inst
(MInst.EmitIsland needed_space)))

;; Helper for emitting `br_table` sequences.
(decl br_table_impl (u64 Reg VecMachLabel) InstOutput)
(rule (br_table_impl (imm12_from_u64 jt_size) ridx targets)
(let ((jt_info BoxJTSequenceInfo (targets_jt_info targets)))
(side_effect (with_flags_side_effect
(cmp_imm (OperandSize.Size32) ridx jt_size)
(jt_sequence ridx jt_info)))))
(rule -1 (br_table_impl jt_size ridx targets)
(let ((jt_size Reg (imm $I64 (ImmExtend.Zero) jt_size))
(jt_info BoxJTSequenceInfo (targets_jt_info targets)))
(side_effect (with_flags_side_effect
(cmp (OperandSize.Size32) ridx jt_size)
(jt_sequence ridx jt_info)))))
45 changes: 0 additions & 45 deletions cranelift/codegen/src/isa/aarch64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -558,18 +558,6 @@ pub enum ScalarSize {
}

impl ScalarSize {
/// Convert from a needed width to the smallest size that fits.
pub fn from_bits<I: Into<usize>>(bits: I) -> ScalarSize {
match bits.into().next_power_of_two() {
8 => ScalarSize::Size8,
16 => ScalarSize::Size16,
32 => ScalarSize::Size32,
64 => ScalarSize::Size64,
128 => ScalarSize::Size128,
w => panic!("Unexpected type width: {}", w),
}
}

/// Convert to an integer operand size.
pub fn operand_size(&self) -> OperandSize {
match self {
Expand All @@ -579,13 +567,6 @@ impl ScalarSize {
}
}

/// Convert from a type into the smallest size that fits.
pub fn from_ty(ty: Type) -> ScalarSize {
debug_assert!(!ty.is_vector());

Self::from_bits(ty_bits(ty))
}

/// Return the encoding bits that are used by some scalar FP instructions
/// for a particular operand size.
pub fn ftype(&self) -> u32 {
Expand Down Expand Up @@ -645,32 +626,6 @@ impl VectorSize {
}
}

/// Convert from a type into a vector operand size.
pub fn from_ty(ty: Type) -> VectorSize {
debug_assert!(ty.is_vector());

match ty {
B8X8 => VectorSize::Size8x8,
B8X16 => VectorSize::Size8x16,
B16X4 => VectorSize::Size16x4,
B16X8 => VectorSize::Size16x8,
B32X2 => VectorSize::Size32x2,
B32X4 => VectorSize::Size32x4,
B64X2 => VectorSize::Size64x2,
F32X2 => VectorSize::Size32x2,
F32X4 => VectorSize::Size32x4,
F64X2 => VectorSize::Size64x2,
I8X8 => VectorSize::Size8x8,
I8X16 => VectorSize::Size8x16,
I16X4 => VectorSize::Size16x4,
I16X8 => VectorSize::Size16x8,
I32X2 => VectorSize::Size32x2,
I32X4 => VectorSize::Size32x4,
I64X2 => VectorSize::Size64x2,
_ => unimplemented!("Unsupported type: {}", ty),
}
}

/// Get the integer operand size that corresponds to a lane of a vector with a certain size.
pub fn operand_size(&self) -> OperandSize {
match self {
Expand Down
189 changes: 186 additions & 3 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@
;; register(s) within which the lowered instruction's result values live.
(decl lower (Inst) InstOutput)

;; Variant of the main lowering constructor term, which receives an
;; additional argument (a vector of branch targets to be used) for
;; implementing branches.
;; For two-branch instructions, the first target is `taken` and the second
;; `not_taken`, even if it is a Fallthrough instruction: because we reorder
;; blocks while we lower, the fallthrough in the new order is not (necessarily)
;; the same as the fallthrough in CLIF. So, we use the explicitly-provided
;; target.
(decl lower_branch (Inst VecMachLabel) InstOutput)

;;;; Rules for `iconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty (iconst (u64_from_imm64 n))))
Expand Down Expand Up @@ -2497,12 +2507,185 @@

;;; Rules for `brz`/`brnz`/`brif`/`brff`/`bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; TODO: requires icmp/fcmp first.
;; `brz` following `icmp`, possibly converted via `bint`.
(rule (lower_branch (brz (icmp cc x @ (value_type ty) y) _ _) targets)
(let ((cond Cond (cond_code cc))
(cond Cond (invert_cond cond)) ;; negate for `brz`
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
(cond_br taken not_taken
(cond_br_cond cond))))))
(rule (lower_branch (brz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
(let ((cond Cond (cond_code cc))
(cond Cond (invert_cond cond)) ;; negate for `brz`
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
(cond_br taken not_taken
(cond_br_cond cond))))))
;; `brnz` following `icmp`, possibly converted via `bint`.
(rule (lower_branch (brnz (icmp cc x @ (value_type ty) y) _ _) targets)
(let ((cond Cond (cond_code cc))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
(cond_br taken not_taken
(cond_br_cond cond))))))
(rule (lower_branch (brnz (bint (icmp cc x @ (value_type ty) y)) _ _) targets)
(let ((cond Cond (cond_code cc))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
(cond_br taken not_taken
(cond_br_cond cond))))))
;; `brz` following `fcmp`, possibly converted via `bint`.
(rule (lower_branch (brz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
(let ((cond Cond (fp_cond_code cc))
(cond Cond (invert_cond cond)) ;; negate for `brz`
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
(cond_br taken not_taken
(cond_br_cond cond))))))
(rule (lower_branch (brz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
(let ((cond Cond (fp_cond_code cc))
(cond Cond (invert_cond cond)) ;; negate for `brz`
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
(cond_br taken not_taken
(cond_br_cond cond))))))
;; `brnz` following `fcmp`, possibly converted via `bint`.
(rule (lower_branch (brnz (fcmp cc x @ (value_type (ty_scalar_float ty)) y) _ _) targets)
(let ((cond Cond (fp_cond_code cc))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
(cond_br taken not_taken
(cond_br_cond cond))))))
(rule (lower_branch (brnz (bint (fcmp cc x @ (value_type (ty_scalar_float ty)) y)) _ _) targets)
(let ((cond Cond (fp_cond_code cc))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
(cond_br taken not_taken
(cond_br_cond cond))))))
;; standard `brz`
(rule (lower_branch (brz c @ (value_type $I128) _ _) targets)
(let ((flags ProducesFlags (flags_to_producesflags c))
(c ValueRegs (put_in_regs c))
(c_lo Reg (value_regs_get c 0))
(c_hi Reg (value_regs_get c 1))
(rt Reg (orr $I64 c_lo c_hi))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect flags
(cond_br taken not_taken (cond_br_zero rt))))))
(rule (lower_branch (brz c @ (value_type ty) _ _) targets)
(if (ty_int_bool_ref_scalar_64 ty))
(let ((flags ProducesFlags (flags_to_producesflags c))
(rt Reg (put_in_reg_zext64 c))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect flags
(cond_br taken not_taken (cond_br_zero rt))))))
;; standard `brnz`
(rule (lower_branch (brnz c @ (value_type $I128) _ _) targets)
(let ((flags ProducesFlags (flags_to_producesflags c))
(c ValueRegs (put_in_regs c))
(c_lo Reg (value_regs_get c 0))
(c_hi Reg (value_regs_get c 1))
(rt Reg (orr $I64 c_lo c_hi))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect flags
(cond_br taken not_taken (cond_br_not_zero rt))))))
(rule (lower_branch (brnz c @ (value_type ty) _ _) targets)
(if (ty_int_bool_ref_scalar_64 ty))
(let ((flags ProducesFlags (flags_to_producesflags c))
(rt Reg (put_in_reg_zext64 c))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect flags
(cond_br taken not_taken (cond_br_not_zero rt))))))

;; `br_icmp`
(rule (lower_branch (br_icmp cc x @ (value_type ty) y _ _) targets)
(let ((cond Cond (cond_code cc))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
(cond_br taken not_taken
(cond_br_cond cond))))))

;; `brif`
(rule (lower_branch (brif cc (ifcmp x @ (value_type ty) y) _ _) targets)
(let ((cond Cond (cond_code cc))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (lower_icmp_into_flags cc x y ty)
(cond_br taken not_taken
(cond_br_cond cond))))))
;; If the `ifcmp` result is actually placed in a register, we need to move it
;; back into the flags.
(rule -1 (lower_branch (brif cc f _ _) targets)
(let ((cond Cond (cond_code cc))
(rn Reg (put_in_reg f))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (mov_to_nzcv rn)
(cond_br taken not_taken
(cond_br_cond cond))))))

;; `brff`
(rule (lower_branch (brff cc (ffcmp x @ (value_type ty) y) _ _) targets)
(let ((cond Cond (fp_cond_code cc))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (fpu_cmp (scalar_size ty) x y)
(cond_br taken not_taken
(cond_br_cond cond))))))
;; If the `ffcmp` result is actually placed in a register, we need to move it
;; back into the flags.
(rule -1 (lower_branch (brff cc f _ _) targets)
(let ((cond Cond (fp_cond_code cc))
(rn Reg (put_in_reg f))
(taken BranchTarget (branch_target targets 0))
(not_taken BranchTarget (branch_target targets 1)))
(side_effect
(with_flags_side_effect (mov_to_nzcv rn)
(cond_br taken not_taken
(cond_br_cond cond))))))

;;; Rules for `jump` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; TODO.
(rule (lower_branch (jump _ _) targets)
(side_effect (aarch64_jump (branch_target targets 0))))

;;; Rules for `br_table` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; TODO.
;; `targets` contains the default target with the list of branch targets
;; concatenated.
(rule (lower_branch (br_table idx _ _) targets)
(let ((jt_size u32 (targets_jt_size targets))
(_ InstOutput (side_effect
(emit_island (targets_jt_space targets))))
(ridx Reg (put_in_reg_zext32 idx)))
(br_table_impl (u32_as_u64 jt_size) ridx targets)))
Loading

0 comments on commit 3a2b32b

Please sign in to comment.