Skip to content

Commit

Permalink
riscv64: Improve handling of i8 icmp_imm's
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 committed Apr 7, 2023
1 parent c61118d commit c179c79
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 55 deletions.
51 changes: 20 additions & 31 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1318,11 +1318,6 @@
(decl pure imm12_bits (Imm12) u64)
(extern constructor imm12_bits imm12_bits)

;; Returns true if the `Imm12` is larger than the max signed value for `Type`.
;; This can really only happen for I8's.
(decl pure imm12_is_outside_signed_range (Imm12 Type) bool)
(extern constructor imm12_is_outside_signed_range imm12_is_outside_signed_range)

(decl imm_from_neg_bits (i64) Imm12)
(extern constructor imm_from_neg_bits imm_from_neg_bits)

Expand Down Expand Up @@ -1350,8 +1345,11 @@
(decl imm12_from_u64 (Imm12) u64)
(extern extractor imm12_from_u64 imm12_from_u64)

(decl match_imm12 (i16) Imm12)
(extern extractor infallible match_imm12 match_imm12)
;; Extracts an imm12 from an i64. The i64 must be in a range that can be
;; represented as an imm12. The value is sign-extended acording to the type
;; provided.
(decl pure partial imm12_sextend_i64 (Type i64) Imm12)
(extern constructor imm12_sextend_i64 imm12_sextend_i64)


;; Float Helpers
Expand Down Expand Up @@ -2405,34 +2403,22 @@
;; For some icmp's we can optimize the instruction sequence if the RHS is a constant.
;;
;; TODO: Currently we only have rules for <=64bit types. We can add more rules for I128's
(decl gen_icmp_imm (IntCC ValueRegs Imm12 Type) Reg)

;; This deals with the case where we have an iconst.i8 that is larger than 127.
;; This is allowed by clif semantics. However when using this constant as a signed value
;; we need to view it as a negative value, otherwise the comparison will be wrong.
;;
;; This can only happen with I8's, since it's the only type smaller than 12bits.
(rule 6 (gen_icmp_imm cc x imm ty @ $I8)
(if-let cc (signed_cond_code cc))
(if-let $true (imm12_is_outside_signed_range imm ty))
(gen_icmp_imm cc x (neg_imm12 (imm12_and imm 0x7F)) ty))

(decl gen_icmp_imm (IntCC ValueRegs i64 Type) Reg)

;; This rule isn't totally necessary since we can just use `slti 0`, but
;; it's the official mnemonic for this operation and gives a slightly nicer
;; disassembly output.
(rule 5 (gen_icmp_imm (IntCC.SignedLessThan) x (match_imm12 0) (fits_in_64 ty))
(rule 5 (gen_icmp_imm (IntCC.SignedLessThan) x 0 (fits_in_64 ty))
(rv_sltz (sext x ty $I64)))

;; `sgtz` is preferable since our equivalent immediate lowering has to do `slt+xori`.
(rule 5 (gen_icmp_imm (IntCC.SignedGreaterThan) x (match_imm12 0) (fits_in_64 ty))
(rule 5 (gen_icmp_imm (IntCC.SignedGreaterThan) x 0 (fits_in_64 ty))
(rv_sgtz (sext x ty $I64)))

;; For these IntCC's we need to both add 1 to the immediate and invert the result.
;; i.e. `x > imm` is the same as `!(x < imm + 1)`.
(rule 4 (gen_icmp_imm (intcc_sgt_or_ugt cc) x imm ty)
(if-let imm_plus_1 (imm12_add imm (imm12_const 1)))
(let ((res Reg (gen_icmp_imm (intcc_reverse cc) x imm_plus_1 ty)))
(let ((res Reg (gen_icmp_imm (intcc_reverse cc) x (i64_add imm 1) ty)))
(rv_xori res (imm12_const 1))))

;; We directly support the inverse of these condition codes. So lower the inverse
Expand All @@ -2445,22 +2431,25 @@
;; Here we can add 1 to the immediate and use the reverse condition code.
;; i.e. `x <= imm` is the same as `x < imm + 1`.
(rule 2 (gen_icmp_imm (intcc_sle_or_ule cc) x imm ty)
(if-let imm_plus_1 (imm12_add imm (imm12_const 1)))
(gen_icmp_imm (intcc_without_equal cc) x imm_plus_1 ty))
(gen_icmp_imm (intcc_without_equal cc) x (i64_add imm 1) ty))

;; We have dedicated instructions for these two cases. (`slti`/`sltiu`)
(rule 1 (gen_icmp_imm (IntCC.SignedLessThan) x imm (fits_in_64 ty))
(rule 1 (gen_icmp_imm (IntCC.SignedLessThan) x n (fits_in_64 ty))
(if-let imm (imm12_sextend_i64 ty n))
(rv_slti (sext x ty $I64) imm))
(rule 1 (gen_icmp_imm (IntCC.UnsignedLessThan) x imm (fits_in_64 ty))
(rule 1 (gen_icmp_imm (IntCC.UnsignedLessThan) x n (fits_in_64 ty))
(if-let imm (imm12_sextend_i64 ty n))
(rv_sltiu (zext x ty $I64) imm))

;; In the fallback case we load the immediate and then compare.
(rule (gen_icmp_imm cc x imm (fits_in_64 ty))
(rule (gen_icmp_imm cc x n (fits_in_64 ty))
(let ((extend_op ExtendOp (icmp_intcc_extend cc ty))
(x_ext Reg (extend x extend_op ty $I64))
;; TODO: Use `load_constant` here when we have it.
(imm Reg (rv_addi (zero_reg) imm))
(y_ext Reg (extend imm extend_op ty $I64)))
;; TODO: Ideally we shouldn't need the sign extend instruction
;; here. We should be able to do it at compile time. But our
;; constant loading infrastructure isn't great yet.
(i Reg (imm $I64 (i64_as_u64 n)))
(y_ext Reg (extend i extend_op ty $I64)))
(gen_icmp cc x_ext y_ext $I64)))


Expand Down
8 changes: 4 additions & 4 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -848,12 +848,12 @@

;; We have some optimizations that we can do when one of the sides is
;; a constant.
(rule 1 (lower (icmp cc x @ (value_type (fits_in_64 ty)) (imm12_from_value y)))
(gen_icmp_imm cc x y ty))
(rule 1 (lower (icmp cc x @ (value_type (fits_in_64 ty)) (u64_from_iconst y)))
(gen_icmp_imm cc x (i64_sextend_imm64 ty (imm64 y)) ty))

;; If the const is on the LHS, move it to the right and flip the IntCC.
(rule 2 (lower (icmp cc (imm12_from_value x) y @ (value_type (fits_in_64 ty))))
(gen_icmp_imm (intcc_reverse cc) y x ty))
(rule 2 (lower (icmp cc (u64_from_iconst x) y @ (value_type (fits_in_64 ty))))
(gen_icmp_imm (intcc_reverse cc) y (i64_sextend_imm64 ty (imm64 x)) ty))

;;;;; Rules for `fcmp`;;;;;;;;;
(rule
Expand Down
14 changes: 6 additions & 8 deletions cranelift/codegen/src/isa/riscv64/lower/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,10 +201,14 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
fn imm12_from_u64(&mut self, arg0: u64) -> Option<Imm12> {
Imm12::maybe_from_u64(arg0)
}

#[inline]
fn match_imm12(&mut self, imm: Imm12) -> i16 {
imm.as_i16()
fn imm12_sextend_i64(&mut self, ty: Type, imm: i64) -> Option<Imm12> {
let shift_count = 64 - ty.bits();
let value = (imm << (shift_count)) >> (shift_count);
Imm12::maybe_from_u64(value as u64)
}

#[inline]
fn writable_zero_reg(&mut self) -> WritableReg {
writable_zero_reg()
Expand All @@ -226,12 +230,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
imm.as_u32() as u64
}
#[inline]
fn imm12_is_outside_signed_range(&mut self, imm: Imm12, ty: Type) -> bool {
let imm = imm.as_i16();
let ty_max = ty.bounds(true).1;
imm > 0 && (imm as u128) > ty_max
}
#[inline]
fn imm_from_neg_bits(&mut self, val: i64) -> Imm12 {
Imm12::maybe_from_u64(val as u64).unwrap()
}
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/isle_prelude.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ macro_rules! isle_common_prelude_methods {
x.wrapping_neg()
}

#[inline]
fn i64_add(&mut self, x: i64, y: i64) -> i64 {
x.wrapping_add(y)
}

#[inline]
fn u64_add(&mut self, x: u64, y: u64) -> u64 {
x.wrapping_add(y)
Expand Down
3 changes: 3 additions & 0 deletions cranelift/codegen/src/prelude.isle
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@
(decl pure i64_neg (i64) i64)
(extern constructor i64_neg i64_neg)

(decl pure i64_add (i64 i64) i64)
(extern constructor i64_add i64_add)

(decl u128_as_u64 (u64) u128)
(extern extractor u128_as_u64 u128_as_u64)

Expand Down
24 changes: 12 additions & 12 deletions cranelift/filetests/filetests/isa/riscv64/iconst-icmp-small.clif
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,27 @@ block0:
; block0:
; lui t1,14
; addi t1,t1,3532
; lui a2,14
; addi a2,a2,3532
; slli a5,t1,48
; srli a7,a5,48
; slli t4,a2,48
; slli a1,t1,48
; srli a3,a1,48
; lui a6,1048574
; addi a6,a6,3532
; slli t4,a6,48
; srli t1,t4,48
; xor a0,a7,t1
; xor a0,a3,t1
; snez a0,a0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; lui t1, 0xe
; addi t1, t1, -0x234
; lui a2, 0xe
; addi a2, a2, -0x234
; slli a5, t1, 0x30
; srli a7, a5, 0x30
; slli t4, a2, 0x30
; slli a1, t1, 0x30
; srli a3, a1, 0x30
; lui a6, 0xffffe
; addi a6, a6, -0x234
; slli t4, a6, 0x30
; srli t1, t4, 0x30
; xor a0, a7, t1
; xor a0, a3, t1
; snez a0, a0
; ret

0 comments on commit c179c79

Please sign in to comment.