Skip to content

Commit

Permalink
riscv64: Improve cls codegen
Browse files Browse the repository at this point in the history
  • Loading branch information
afonso360 committed Mar 17, 2023
1 parent cceef5c commit 41773c1
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 41 deletions.
70 changes: 31 additions & 39 deletions cranelift/codegen/src/isa/riscv64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1031,6 +1031,37 @@



;; TODO: LLVM lowers this as not+clz for i32/i64. We should do the same.
(decl lower_cls (Type Reg) Reg)
(rule (lower_cls ty r)
(let ((tmp Reg (ext_int_if_need $true r ty))
(tmp2 Reg (gen_select_reg (IntCC.SignedLessThan) tmp (zero_reg) (gen_bit_not r) r))
(tmp3 Reg (lower_clz ty tmp2)))
(alu_rr_imm12 (AluOPRRI.Addi) tmp3 (imm12_const -1))))

;; If the sign bit is set, we count the leading zeros of the inverted value.
;; Otherwise we can just count the leading zeros of the original value.
;; Subtract 1 since the sign bit does not count.
(decl lower_cls_i128 (ValueRegs) ValueRegs)
(rule (lower_cls_i128 x)
(let ((low Reg (value_regs_get x 0))
(high Reg (value_regs_get x 1))
(low Reg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (gen_bit_not low) low))
(high Reg (gen_select_reg (IntCC.SignedLessThan) high (zero_reg) (gen_bit_not high) high))
(tmp ValueRegs (lower_clz_i128 (value_regs low high)))
(count Reg (value_regs_get tmp 0))
(result Reg (alu_rr_imm12 (AluOPRRI.Addi) count (imm12_const -1))))
(zext result $I64 $I128)))


(decl gen_cltz (bool Reg Type) Reg)
(rule (gen_cltz leading rs ty)
(let ((tmp WritableReg (temp_writable_reg $I64))
(step WritableReg (temp_writable_reg $I64))
(sum WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.Cltz leading sum step tmp rs ty))))
sum))


;; Extends an integer if it is smaller than 64 bits.
(decl ext_int_if_need (bool ValueRegs Type) ValueRegs)
Expand Down Expand Up @@ -1267,27 +1298,6 @@
(part3 Reg (gen_select_reg (IntCC.Equal) shamt (zero_reg) (zero_reg) part2)))
(alu_rrr (AluOPRRR.Or) part1 part3)))

(decl lower_cls (Reg Type) Reg)
(rule
(lower_cls r ty)
(let
( ;; extract sign bit.
(tmp Reg (ext_int_if_need $true r ty))
;;
(tmp2 Reg (gen_select_reg (IntCC.SignedLessThan) tmp (zero_reg) (gen_bit_not r) r))
;;
(tmp3 Reg (lower_clz ty tmp2)))
(alu_rr_imm12 (AluOPRRI.Addi) tmp3 (imm12_const -1))))

(decl gen_cltz (bool Reg Type) Reg)
(rule
(gen_cltz leading rs ty)
(let
((tmp WritableReg (temp_writable_reg $I64))
(step WritableReg (temp_writable_reg $I64))
(sum WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.Cltz leading sum step tmp rs ty))))
(writable_reg_to_reg sum)))

(decl gen_popcnt (Reg Type) Reg)
(rule
Expand Down Expand Up @@ -1454,24 +1464,6 @@
(gen_select_reg (IntCC.UnsignedGreaterThanOrEqual) shamt_128 const64 high_replacement high))))


(decl lower_cls_i128 (ValueRegs) ValueRegs)
(rule
(lower_cls_i128 x)
(let
( ;;; we use clz to implement cls
;;; if value is negtive we need inverse all bits.
(low Reg
(gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) (gen_bit_not (value_regs_get x 0)) (value_regs_get x 0)))
;;;
(high Reg
(gen_select_reg (IntCC.SignedLessThan) (value_regs_get x 1) (zero_reg) (gen_bit_not (value_regs_get x 1)) (value_regs_get x 1)))
;; count leading zeros.
(tmp ValueRegs (lower_clz_i128 (value_regs low high)))
(count Reg (value_regs_get tmp 0))
(result Reg (alu_rr_imm12 (AluOPRRI.Addi) count (imm12_const -1))))
(value_regs result (load_u64_constant 0))))


(decl gen_amode (Reg Offset32 Type) AMode)
(extern constructor gen_amode gen_amode)

Expand Down
2 changes: 1 addition & 1 deletion cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@

;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type (fits_in_64 ty) (cls x)))
(lower_cls x ty))
(lower_cls ty x))

(rule 1 (lower (has_type $I128 (cls x)))
(lower_cls_i128 x))
Expand Down
164 changes: 164 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/cls-zbb.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
test compile precise-output
set unwind_info=false
target riscv64 has_zbb


function %cls_i8(i8) -> i8 {
block0(v0: i8):
v1 = cls v0
return v1
}

; VCode:
; block0:
; sext.b t2,a0
; not a1,a0
; select_reg a3,a1,a0##condition=(t2 slt zero)
; andi a5,a3,255
; clz a7,a5
; addi t4,a7,-56
; addi a0,t4,-1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x93, 0x13, 0x45, 0x60
; not a1, a0
; bltz t2, 0xc
; ori a3, a0, 0
; j 8
; ori a3, a1, 0
; andi a5, a3, 0xff
; .byte 0x93, 0x98, 0x07, 0x60
; addi t4, a7, -0x38
; addi a0, t4, -1
; ret

function %cls_i16(i16) -> i16 {
block0(v0: i16):
v1 = cls v0
return v1
}

; VCode:
; block0:
; sext.h t2,a0
; not a1,a0
; select_reg a3,a1,a0##condition=(t2 slt zero)
; zext.h a5,a3
; clz a7,a5
; addi t4,a7,-48
; addi a0,t4,-1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; .byte 0x93, 0x13, 0x55, 0x60
; not a1, a0
; bltz t2, 0xc
; ori a3, a0, 0
; j 8
; ori a3, a1, 0
; .byte 0xbb, 0xc7, 0x06, 0x08
; .byte 0x93, 0x98, 0x07, 0x60
; addi t4, a7, -0x30
; addi a0, t4, -1
; ret

function %cls_i32(i32) -> i32 {
block0(v0: i32):
v1 = cls v0
return v1
}

; VCode:
; block0:
; sext.w t2,a0
; not a1,a0
; select_reg a3,a1,a0##condition=(t2 slt zero)
; clzw a5,a3
; addi a0,a5,-1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; sext.w t2, a0
; not a1, a0
; bltz t2, 0xc
; ori a3, a0, 0
; j 8
; ori a3, a1, 0
; .byte 0x9b, 0x97, 0x06, 0x60
; addi a0, a5, -1
; ret

function %cls_i64(i64) -> i64 {
block0(v0: i64):
v1 = cls v0
return v1
}

; VCode:
; block0:
; not t2,a0
; select_reg a1,t2,a0##condition=(a0 slt zero)
; clz a3,a1
; addi a0,a3,-1
; ret
;
; Disassembled:
; block0: ; offset 0x0
; not t2, a0
; bltz a0, 0xc
; ori a1, a0, 0
; j 8
; ori a1, t2, 0
; .byte 0x93, 0x96, 0x05, 0x60
; addi a0, a3, -1
; ret

function %cls_i128(i128) -> i128 {
block0(v0: i128):
v1 = cls v0
return v1
}

; VCode:
; block0:
; not a2,a0
; select_reg a2,a2,a0##condition=(a1 slt zero)
; not a4,a1
; select_reg a6,a4,a1##condition=(a1 slt zero)
; clz t3,a6
; clz t0,a2
; li t2,64
; select_reg a1,t0,zero##condition=(t2 eq t3)
; add a3,t3,a1
; li a5,0
; addi a0,a3,-1
; li a1,0
; ret
;
; Disassembled:
; block0: ; offset 0x0
; not a2, a0
; bltz a1, 8
; ori a2, a0, 0
; not a4, a1
; bltz a1, 0xc
; ori a6, a1, 0
; j 8
; ori a6, a4, 0
; .byte 0x13, 0x1e, 0x08, 0x60
; .byte 0x93, 0x12, 0x06, 0x60
; addi t2, zero, 0x40
; beq t2, t3, 0xc
; ori a1, zero, 0
; j 8
; ori a1, t0, 0
; add a3, t3, a1
; mv a5, zero
; addi a0, a3, -1
; mv a1, zero
; ret

1 change: 1 addition & 0 deletions cranelift/filetests/filetests/runtests/cls.clif
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ test interpret
test run
target aarch64
target riscv64
target riscv64 has_zbb
target s390x
; not implemented on `x86_64`

Expand Down
3 changes: 2 additions & 1 deletion cranelift/filetests/filetests/runtests/i128-cls.clif
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
test run
target aarch64
target riscv64
target riscv64
target riscv64 has_zbb
target s390x

function %cls_i128(i128) -> i128 {
Expand Down

0 comments on commit 41773c1

Please sign in to comment.