diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index cf262d8281c5..70e51f37d9e8 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -1667,11 +1667,11 @@ (rule (mask_amt_reg (gpr32_ty ty) reg) (let ((mask u8 (mask_amt_imm ty -1))) (and_uimm16shifted ty reg (uimm16shifted (u8_as_u16 mask) 0)))) -(rule (mask_amt_reg (gpr64_ty ty) reg) reg) +(rule 1 (mask_amt_reg (gpr64_ty ty) reg) reg) ;; Load a shift amount into a GPR. (decl amt_reg (Value) Reg) -(rule (amt_reg amt @ (value_type (fits_in_64 _))) amt) +(rule 1 (amt_reg amt @ (value_type (fits_in_64 _))) amt) (rule (amt_reg amt @ (value_type (vr128_ty _))) (vec_extract_lane $I64X2 amt 1 (zero_reg))) @@ -1680,9 +1680,9 @@ (rule (amt_vr amt @ (value_type (fits_in_64 _))) (vec_replicate_lane $I8X16 (vec_insert_lane_undef $I8X16 amt 0 (zero_reg)) 0)) -(rule (amt_vr amt @ (value_type (vr128_ty _))) +(rule 1 (amt_vr amt @ (value_type (vr128_ty _))) (vec_replicate_lane $I8X16 amt 15)) -(rule (amt_vr (u64_from_value amt)) +(rule 2 (amt_vr (u64_from_value amt)) (vec_imm_splat $I8X16 amt)) @@ -1784,10 +1784,10 @@ (rule (lower_address flags addr (i64_from_offset offset)) (memarg_reg_plus_off addr offset 0 flags)) -(rule (lower_address flags (iadd x y) (i64_from_offset 0)) +(rule 1 (lower_address flags (iadd x y) (i64_from_offset 0)) (memarg_reg_plus_reg x y 0 flags)) -(rule (lower_address flags +(rule 1 (lower_address flags (symbol_value (symbol_value_data name (reloc_distance_near) sym_offset)) (i64_from_offset offset)) (if-let final_offset (memarg_symbol_offset_sum offset sym_offset)) @@ -1801,7 +1801,7 @@ (rule (lower_address_bias flags addr (i64_from_offset offset) bias) (memarg_reg_plus_off addr offset bias flags)) -(rule (lower_address_bias flags (iadd x y) (i64_from_offset 0) bias) +(rule 1 (lower_address_bias flags (iadd x y) (i64_from_offset 0) bias) (memarg_reg_plus_reg x y bias flags)) @@ -1830,7 +1830,7 @@ ;; Convert a MemArg to a MemArgPair, reloading the address if necessary. (decl memarg_pair (MemArg) MemArgPair) -(rule (memarg_pair (memarg_pair_from_memarg mem)) mem) +(rule 1 (memarg_pair (memarg_pair_from_memarg mem)) mem) (rule (memarg_pair mem) (memarg_pair_from_reg (load_addr mem) (memarg_flags mem))) @@ -2770,19 +2770,19 @@ ;; Move source register into destination. (Non-SSA form.) (decl emit_mov (Type WritableReg Reg) Unit) -(rule (emit_mov (gpr32_ty _ty) dst src) +(rule 1 (emit_mov (gpr32_ty _ty) dst src) (emit (MInst.Mov32 dst src))) -(rule (emit_mov (gpr64_ty _ty) dst src) +(rule 2 (emit_mov (gpr64_ty _ty) dst src) (emit (MInst.Mov64 dst src))) -(rule (emit_mov $F32 dst src) +(rule 3 (emit_mov $F32 dst src) (emit (MInst.FpuMove32 dst src))) -(rule (emit_mov $F64 dst src) +(rule 3 (emit_mov $F64 dst src) (emit (MInst.FpuMove64 dst src))) -(rule (emit_mov (vr128_ty ty) dst src) +(rule 0 (emit_mov (vr128_ty ty) dst src) (emit (MInst.VecMov dst src))) ;; Allocate a temporary (writable) register, initialized as a copy of the input. @@ -2833,7 +2833,7 @@ (emit_side_effect (vec_store_lane $F32X4 reg mem 0))) (rule (emit_arg_store $F64 reg mem) (emit_side_effect (vec_store_lane $F64X2 reg mem 0))) -(rule (emit_arg_store (vr128_ty ty) reg mem) +(rule -1 (emit_arg_store (vr128_ty ty) reg mem) (emit_side_effect (vec_store reg mem))) (decl emit_arg_load (Type MemArg) Reg) @@ -2844,7 +2844,7 @@ (rule (emit_arg_load $R64 mem) (load64 mem)) (rule (emit_arg_load $F32 mem) (vec_load_lane_undef $F32X4 mem 0)) (rule (emit_arg_load $F64 mem) (vec_load_lane_undef $F64X2 mem 0)) -(rule (emit_arg_load (vr128_ty ty) mem) (vec_load ty mem)) +(rule -1 (emit_arg_load (vr128_ty ty) mem) (vec_load ty mem)) ;; Helper to perform a lane swap in register. (decl vec_elt_rev (Type Reg) Reg) @@ -2864,19 +2864,19 @@ ;; a different lane order than the current function, we need to swap lanes. ;; The first operand is the lane order used by the callee. (decl abi_vec_elt_rev (LaneOrder Type Reg) Reg) -(rule (abi_vec_elt_rev _ (gpr32_ty ty) reg) reg) -(rule (abi_vec_elt_rev _ (gpr64_ty ty) reg) reg) -(rule (abi_vec_elt_rev _ (ty_scalar_float ty) reg) reg) -(rule (abi_vec_elt_rev callee_lane_order _ reg) +(rule 4 (abi_vec_elt_rev _ (gpr32_ty ty) reg) reg) +(rule 3 (abi_vec_elt_rev _ (gpr64_ty ty) reg) reg) +(rule 2 (abi_vec_elt_rev _ (ty_scalar_float ty) reg) reg) +(rule 0 (abi_vec_elt_rev callee_lane_order _ reg) (if-let $true (lane_order_equal callee_lane_order (lane_order))) reg) -(rule (abi_vec_elt_rev callee_lane_order (vr128_ty ty) reg) +(rule 1 (abi_vec_elt_rev callee_lane_order (vr128_ty ty) reg) (if-let $false (lane_order_equal callee_lane_order (lane_order))) (vec_elt_rev ty reg)) ;; Helpers to emit a memory copy (MVC or memcpy libcall). (decl emit_memcpy (MemArg MemArg u64) Unit) -(rule (emit_memcpy dst src (len_minus_one len)) +(rule 1 (emit_memcpy dst src (len_minus_one len)) (emit_side_effect (mvc (memarg_pair dst) (memarg_pair src) len))) (rule (emit_memcpy dst src len) (let ((libcall LibCallInfo (lib_call_info_memcpy)) @@ -2888,13 +2888,13 @@ ;; Prepare a stack copy of a single (oversized) argument. (decl copy_to_buffer (i64 ABIArg Value) InstOutput) -(rule (copy_to_buffer base (abi_arg_only_slot slot) _) (output_none)) -(rule (copy_to_buffer base (abi_arg_struct_pointer _ offset size) val) +(rule 2 (copy_to_buffer base (abi_arg_only_slot slot) _) (output_none)) +(rule 1 (copy_to_buffer base (abi_arg_struct_pointer _ offset size) val) (let ((dst MemArg (memarg_stack_off base offset)) (src MemArg (memarg_reg_plus_off val 0 0 (memflags_trusted))) (_ Unit (emit_memcpy dst src size))) (output_none))) -(rule (copy_to_buffer base (abi_arg_implicit_pointer _ offset ty) +(rule 0 (copy_to_buffer base (abi_arg_implicit_pointer _ offset ty) val @ (value_type ty)) (let ((mem MemArg (memarg_stack_off base offset)) (_ Unit (emit_arg_store ty val mem))) @@ -2903,12 +2903,12 @@ ;; Copy a single argument/return value to its slots. ;; For oversized arguments, set the slot to the buffer address. (decl copy_to_arg (LaneOrder i64 ABIArg Value) Unit) -(rule (copy_to_arg lo base (abi_arg_only_slot slot) val) +(rule 2 (copy_to_arg lo base (abi_arg_only_slot slot) val) (copy_val_to_arg_slot lo base slot val)) -(rule (copy_to_arg _ base (abi_arg_struct_pointer slot offset _) _) +(rule 1 (copy_to_arg _ base (abi_arg_struct_pointer slot offset _) _) (let ((ptr Reg (load_addr (memarg_stack_off base offset)))) (copy_reg_to_arg_slot base slot ptr))) -(rule (copy_to_arg _ base (abi_arg_implicit_pointer slot offset _) _) +(rule 0 (copy_to_arg _ base (abi_arg_implicit_pointer slot offset _) _) (let ((ptr Reg (load_addr (memarg_stack_off base offset)))) (copy_reg_to_arg_slot base slot ptr))) @@ -2965,35 +2965,35 @@ (decl emit_imm (Type WritableReg u64) Unit) ;; 16-bit (or smaller) result type, any value -(rule (emit_imm (fits_in_16 _ty) dst n) +(rule 5 (emit_imm (fits_in_16 _ty) dst n) (emit (MInst.Mov32SImm16 dst (u64_as_i16 n)))) ;; 32-bit result type, value fits in i16 -(rule (emit_imm (gpr32_ty _ty) dst (i16_from_u64 n)) +(rule 4 (emit_imm (gpr32_ty _ty) dst (i16_from_u64 n)) (emit (MInst.Mov32SImm16 dst n))) ;; 32-bit result type, any value -(rule (emit_imm (gpr32_ty _ty) dst n) +(rule 3 (emit_imm (gpr32_ty _ty) dst n) (emit (MInst.Mov32Imm dst (u64_as_u32 n)))) ;; 64-bit result type, value fits in i16 -(rule (emit_imm (gpr64_ty _ty) dst (i16_from_u64 n)) +(rule 6 (emit_imm (gpr64_ty _ty) dst (i16_from_u64 n)) (emit (MInst.Mov64SImm16 dst n))) ;; 64-bit result type, value fits in i32 -(rule (emit_imm (gpr64_ty _ty) dst (i32_from_u64 n)) +(rule 2 (emit_imm (gpr64_ty _ty) dst (i32_from_u64 n)) (emit (MInst.Mov64SImm32 dst n))) ;; 64-bit result type, value fits in UImm16Shifted -(rule (emit_imm (gpr64_ty _ty) dst (uimm16shifted_from_u64 n)) +(rule 1 (emit_imm (gpr64_ty _ty) dst (uimm16shifted_from_u64 n)) (emit (MInst.Mov64UImm16Shifted dst n))) ;; 64-bit result type, value fits in UImm32Shifted -(rule (emit_imm (gpr64_ty _ty) dst (uimm32shifted_from_u64 n)) +(rule 0 (emit_imm (gpr64_ty _ty) dst (uimm32shifted_from_u64 n)) (emit (MInst.Mov64UImm32Shifted dst n))) ;; 64-bit result type, value with non-zero low-/high-parts. -(rule (emit_imm (gpr64_ty ty) dst (and (u64_nonzero_hipart hi) +(rule 7 (emit_imm (gpr64_ty ty) dst (and (u64_nonzero_hipart hi) (u64_nonzero_lopart lo))) (let ((_ Unit (emit_imm ty dst hi))) (emit_insert_imm dst lo))) @@ -3002,7 +3002,7 @@ (decl emit_insert_imm (WritableReg u64) Unit) ;; Insertion, value fits in UImm16Shifted -(rule (emit_insert_imm dst (uimm16shifted_from_u64 n)) +(rule 1 (emit_insert_imm dst (uimm16shifted_from_u64 n)) (emit (MInst.Insert64UImm16Shifted dst n))) ;; Insertion, value fits in UImm32Shifted @@ -3011,12 +3011,12 @@ ;; 32-bit floating-point type, any value. Loaded from literal pool. ;; TODO: use LZER to load 0.0 -(rule (emit_imm $F32 dst n) +(rule 8 (emit_imm $F32 dst n) (emit (MInst.LoadFpuConst32 dst (u64_as_u32 n)))) ;; 64-bit floating-point type, any value. Loaded from literal pool. ;; TODO: use LZDR to load 0.0 -(rule (emit_imm $F64 dst n) +(rule 8 (emit_imm $F64 dst n) (emit (MInst.LoadFpuConst64 dst n))) ;; Allocate a temporary register, initialized with an immediate. @@ -3035,32 +3035,32 @@ ;; Allocate a temporary register, initialized with a vector immediate. (decl vec_imm (Type u128) Reg) -(rule (vec_imm (vr128_ty ty) 0) +(rule 2 (vec_imm (vr128_ty ty) 0) (vec_imm_byte_mask ty 0)) -(rule (vec_imm (vr128_ty ty) (u64_pair n n)) +(rule 1 (vec_imm (vr128_ty ty) (u64_pair n n)) (vec_imm_splat $I64X2 n)) (rule (vec_imm (vr128_ty ty) n) (vec_load_const ty n)) ;; Variant with replicated immediate. (decl vec_imm_splat (Type u64) Reg) -(rule (vec_imm_splat (ty_vec128 ty) 0) +(rule 1 (vec_imm_splat (ty_vec128 ty) 0) (vec_imm_byte_mask ty 0)) -(rule (vec_imm_splat ty @ (multi_lane 8 _) n) +(rule 2 (vec_imm_splat ty @ (multi_lane 8 _) n) (vec_imm_replicate ty (u64_as_i16 n))) -(rule (vec_imm_splat ty @ (multi_lane 16 _) n) +(rule 2 (vec_imm_splat ty @ (multi_lane 16 _) n) (vec_imm_replicate ty (u64_as_i16 n))) -(rule (vec_imm_splat ty @ (multi_lane 32 _) (u32_pair _ (i16_from_u32 n))) +(rule 2 (vec_imm_splat ty @ (multi_lane 32 _) (u32_pair _ (i16_from_u32 n))) (vec_imm_replicate ty n)) -(rule (vec_imm_splat ty @ (multi_lane 64 _) (i16_from_u64 n)) +(rule 2 (vec_imm_splat ty @ (multi_lane 64 _) (i16_from_u64 n)) (vec_imm_replicate ty n)) -(rule (vec_imm_splat (multi_lane 16 _) (u32_pair _ (u16_pair _ (u8_pair n n)))) +(rule 3 (vec_imm_splat (multi_lane 16 _) (u32_pair _ (u16_pair _ (u8_pair n n)))) (vec_imm_splat $I8X16 (u8_as_u64 n))) -(rule (vec_imm_splat (multi_lane 32 _) (u32_pair _ (u16_pair n n))) +(rule 3 (vec_imm_splat (multi_lane 32 _) (u32_pair _ (u16_pair n n))) (vec_imm_splat $I16X8 (u16_as_u64 n))) -(rule (vec_imm_splat (multi_lane 64 _) (u32_pair n n)) +(rule 3 (vec_imm_splat (multi_lane 64 _) (u32_pair n n)) (vec_imm_splat $I32X4 (u32_as_u64 n))) -(rule (vec_imm_splat (ty_vec128 ty) n) +(rule 0 (vec_imm_splat (ty_vec128 ty) n) (vec_load_const_replicate ty n)) ;; Place an immediate into the low half of a register pair. @@ -3206,90 +3206,90 @@ ;; Place `Value` into destination, zero-extending to 32 bits if smaller. (Non-SSA form.) (decl emit_put_in_reg_zext32 (WritableReg Value) Unit) -(rule (emit_put_in_reg_zext32 dst (and (value_type ty) (u64_from_value val))) +(rule 3 (emit_put_in_reg_zext32 dst (and (value_type ty) (u64_from_value val))) (emit_imm (ty_ext32 ty) dst val)) -(rule (emit_put_in_reg_zext32 dst (and (value_type (fits_in_16 ty)) (sinkable_load load))) +(rule 1 (emit_put_in_reg_zext32 dst (and (value_type (fits_in_16 ty)) (sinkable_load load))) (emit_zext32_mem dst ty (sink_load load))) -(rule (emit_put_in_reg_zext32 dst val @ (value_type (fits_in_16 ty))) +(rule 0 (emit_put_in_reg_zext32 dst val @ (value_type (fits_in_16 ty))) (emit_zext32_reg dst ty val)) -(rule (emit_put_in_reg_zext32 dst val @ (value_type (ty_32_or_64 ty))) +(rule 2 (emit_put_in_reg_zext32 dst val @ (value_type (ty_32_or_64 ty))) (emit_mov ty dst val)) ;; Place `Value` into destination, sign-extending to 32 bits if smaller. (Non-SSA form.) (decl emit_put_in_reg_sext32 (WritableReg Value) Unit) -(rule (emit_put_in_reg_sext32 dst (and (value_type ty) (u64_from_signed_value val))) +(rule 3 (emit_put_in_reg_sext32 dst (and (value_type ty) (u64_from_signed_value val))) (emit_imm (ty_ext32 ty) dst val)) -(rule (emit_put_in_reg_sext32 dst (and (value_type (fits_in_16 ty)) (sinkable_load load))) +(rule 1 (emit_put_in_reg_sext32 dst (and (value_type (fits_in_16 ty)) (sinkable_load load))) (emit_sext32_mem dst ty (sink_load load))) -(rule (emit_put_in_reg_sext32 dst val @ (value_type (fits_in_16 ty))) +(rule 0 (emit_put_in_reg_sext32 dst val @ (value_type (fits_in_16 ty))) (emit_sext32_reg dst ty val)) -(rule (emit_put_in_reg_sext32 dst val @ (value_type (ty_32_or_64 ty))) +(rule 2 (emit_put_in_reg_sext32 dst val @ (value_type (ty_32_or_64 ty))) (emit_mov ty dst val)) ;; Place `Value` into destination, zero-extending to 64 bits if smaller. (Non-SSA form.) (decl emit_put_in_reg_zext64 (WritableReg Value) Unit) -(rule (emit_put_in_reg_zext64 dst (and (value_type ty) (u64_from_value val))) +(rule 3 (emit_put_in_reg_zext64 dst (and (value_type ty) (u64_from_value val))) (emit_imm (ty_ext64 ty) dst val)) -(rule (emit_put_in_reg_zext64 dst (and (value_type (gpr32_ty ty)) (sinkable_load load))) +(rule 1 (emit_put_in_reg_zext64 dst (and (value_type (gpr32_ty ty)) (sinkable_load load))) (emit_zext64_mem dst ty (sink_load load))) -(rule (emit_put_in_reg_zext64 dst val @ (value_type (gpr32_ty ty))) +(rule 0 (emit_put_in_reg_zext64 dst val @ (value_type (gpr32_ty ty))) (emit_zext64_reg dst ty val)) -(rule (emit_put_in_reg_zext64 dst val @ (value_type (gpr64_ty ty))) +(rule 2 (emit_put_in_reg_zext64 dst val @ (value_type (gpr64_ty ty))) (emit_mov ty dst val)) ;; Place `Value` into destination, sign-extending to 64 bits if smaller. (Non-SSA form.) (decl emit_put_in_reg_sext64 (WritableReg Value) Unit) -(rule (emit_put_in_reg_sext64 dst (and (value_type ty) (u64_from_signed_value val))) +(rule 3 (emit_put_in_reg_sext64 dst (and (value_type ty) (u64_from_signed_value val))) (emit_imm (ty_ext64 ty) dst val)) -(rule (emit_put_in_reg_sext64 dst (and (value_type (gpr32_ty ty)) (sinkable_load load))) +(rule 1 (emit_put_in_reg_sext64 dst (and (value_type (gpr32_ty ty)) (sinkable_load load))) (emit_sext64_mem dst ty (sink_load load))) -(rule (emit_put_in_reg_sext64 dst val @ (value_type (gpr32_ty ty))) +(rule 0 (emit_put_in_reg_sext64 dst val @ (value_type (gpr32_ty ty))) (emit_sext64_reg dst ty val)) -(rule (emit_put_in_reg_sext64 dst val @ (value_type (gpr64_ty ty))) +(rule 2 (emit_put_in_reg_sext64 dst val @ (value_type (gpr64_ty ty))) (emit_mov ty dst val)) ;; Place `Value` into a register, zero-extending to 32 bits if smaller. (decl put_in_reg_zext32 (Value) Reg) -(rule (put_in_reg_zext32 (and (value_type ty) (u64_from_value val))) +(rule 3 (put_in_reg_zext32 (and (value_type ty) (u64_from_value val))) (imm (ty_ext32 ty) val)) -(rule (put_in_reg_zext32 (and (value_type (fits_in_16 ty)) (sinkable_load load))) +(rule 1 (put_in_reg_zext32 (and (value_type (fits_in_16 ty)) (sinkable_load load))) (zext32_mem ty (sink_load load))) -(rule (put_in_reg_zext32 val @ (value_type (fits_in_16 ty))) +(rule 0 (put_in_reg_zext32 val @ (value_type (fits_in_16 ty))) (zext32_reg ty val)) -(rule (put_in_reg_zext32 val @ (value_type (ty_32_or_64 _ty))) +(rule 2 (put_in_reg_zext32 val @ (value_type (ty_32_or_64 _ty))) val) ;; Place `Value` into a register, sign-extending to 32 bits if smaller. (decl put_in_reg_sext32 (Value) Reg) -(rule (put_in_reg_sext32 (and (value_type ty) (u64_from_signed_value val))) +(rule 3 (put_in_reg_sext32 (and (value_type ty) (u64_from_signed_value val))) (imm (ty_ext32 ty) val)) -(rule (put_in_reg_sext32 (and (value_type (fits_in_16 ty)) (sinkable_load load))) +(rule 1 (put_in_reg_sext32 (and (value_type (fits_in_16 ty)) (sinkable_load load))) (sext32_mem ty (sink_load load))) -(rule (put_in_reg_sext32 val @ (value_type (fits_in_16 ty))) +(rule 0 (put_in_reg_sext32 val @ (value_type (fits_in_16 ty))) (sext32_reg ty val)) -(rule (put_in_reg_sext32 val @ (value_type (ty_32_or_64 _ty))) +(rule 2 (put_in_reg_sext32 val @ (value_type (ty_32_or_64 _ty))) val) ;; Place `Value` into a register, zero-extending to 64 bits if smaller. (decl put_in_reg_zext64 (Value) Reg) -(rule (put_in_reg_zext64 (and (value_type ty) (u64_from_value val))) +(rule 3 (put_in_reg_zext64 (and (value_type ty) (u64_from_value val))) (imm (ty_ext64 ty) val)) -(rule (put_in_reg_zext64 (and (value_type (gpr32_ty ty)) (sinkable_load load))) +(rule 1 (put_in_reg_zext64 (and (value_type (gpr32_ty ty)) (sinkable_load load))) (zext64_mem ty (sink_load load))) -(rule (put_in_reg_zext64 val @ (value_type (gpr32_ty ty))) +(rule 0 (put_in_reg_zext64 val @ (value_type (gpr32_ty ty))) (zext64_reg ty val)) -(rule (put_in_reg_zext64 val @ (value_type (gpr64_ty ty))) +(rule 2 (put_in_reg_zext64 val @ (value_type (gpr64_ty ty))) val) ;; Place `Value` into a register, sign-extending to 64 bits if smaller. (decl put_in_reg_sext64 (Value) Reg) -(rule (put_in_reg_sext64 (and (value_type ty) (u64_from_signed_value val))) +(rule 3 (put_in_reg_sext64 (and (value_type ty) (u64_from_signed_value val))) (imm (ty_ext64 ty) val)) -(rule (put_in_reg_sext64 (and (value_type (gpr32_ty ty)) (sinkable_load load))) +(rule 1 (put_in_reg_sext64 (and (value_type (gpr32_ty ty)) (sinkable_load load))) (sext64_mem ty (sink_load load))) -(rule (put_in_reg_sext64 val @ (value_type (gpr32_ty ty))) +(rule 0 (put_in_reg_sext64 val @ (value_type (gpr32_ty ty))) (sext64_reg ty val)) -(rule (put_in_reg_sext64 val @ (value_type (gpr64_ty ty))) +(rule 2 (put_in_reg_sext64 val @ (value_type (gpr64_ty ty))) val) ;; Place `Value` into the low half of a register pair, zero-extending @@ -3332,7 +3332,7 @@ (rule (emit_cmov_imm (gpr32_ty _ty) dst cond imm) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32SImm16 dst cond imm) dst)) -(rule (emit_cmov_imm (gpr64_ty _ty) dst cond imm) +(rule 1 (emit_cmov_imm (gpr64_ty _ty) dst cond imm) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64SImm16 dst cond imm) dst)) @@ -3362,19 +3362,19 @@ ;; Conditionally select between two source registers. (Non-SSA form.) (decl emit_cmov_reg (Type WritableReg Cond Reg) ConsumesFlags) -(rule (emit_cmov_reg (gpr32_ty _ty) dst cond src) +(rule 1 (emit_cmov_reg (gpr32_ty _ty) dst cond src) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov32 dst cond src) dst)) -(rule (emit_cmov_reg (gpr64_ty _ty) dst cond src) +(rule 2 (emit_cmov_reg (gpr64_ty _ty) dst cond src) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.CMov64 dst cond src) dst)) -(rule (emit_cmov_reg $F32 dst cond src) +(rule 3 (emit_cmov_reg $F32 dst cond src) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov32 dst cond src) dst)) -(rule (emit_cmov_reg $F64 dst cond src) +(rule 3 (emit_cmov_reg $F64 dst cond src) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.FpuCMov64 dst cond src) dst)) -(rule (emit_cmov_reg (vr128_ty ty) dst cond src) +(rule 0 (emit_cmov_reg (vr128_ty ty) dst cond src) (ConsumesFlags.ConsumesFlagsReturnsReg (MInst.VecCMov dst cond src) dst)) @@ -3543,7 +3543,7 @@ ;; copied out of the hard register. In the little-endian case, we need to ;; byte-swap since the compare-and-swap instruction is always big-endian. (decl casloop_result (Type MemFlags Reg) Reg) -(rule (casloop_result (ty_32_or_64 ty) (bigendian) result) +(rule 1 (casloop_result (ty_32_or_64 ty) (bigendian) result) (copy_reg ty result)) (rule (casloop_result (ty_32_or_64 ty) (littleendian) result) (bswap_reg ty result)) @@ -3582,7 +3582,7 @@ (decl casloop_rotate_in (VecMInstBuilder Type MemFlags Reg Reg) Reg) (rule (casloop_rotate_in ib $I8 _ bitshift val) (push_rot_imm_reg ib $I32 (casloop_tmp_reg) val 0 bitshift)) -(rule (casloop_rotate_in ib $I16 (bigendian) bitshift val) +(rule 1 (casloop_rotate_in ib $I16 (bigendian) bitshift val) (push_rot_imm_reg ib $I32 (casloop_tmp_reg) val 0 bitshift)) (rule (casloop_rotate_in ib $I16 (littleendian) bitshift val) (push_rot_imm_reg ib $I32 (casloop_tmp_reg) val 16 bitshift)) @@ -3595,7 +3595,7 @@ (decl casloop_rotate_out (VecMInstBuilder Type MemFlags Reg Reg) Reg) (rule (casloop_rotate_out ib $I8 _ bitshift val) (push_rot_imm_reg ib $I32 (casloop_tmp_reg) val 0 (neg_reg $I32 bitshift))) -(rule (casloop_rotate_out ib $I16 (bigendian) bitshift val) +(rule 1 (casloop_rotate_out ib $I16 (bigendian) bitshift val) (push_rot_imm_reg ib $I32 (casloop_tmp_reg) val 0 bitshift)) (rule (casloop_rotate_out ib $I16 (littleendian) bitshift val) (push_rot_imm_reg ib $I32 (casloop_tmp_reg) val 16 bitshift)) @@ -3610,7 +3610,7 @@ (decl casloop_rotate_result (Type MemFlags Reg Reg) Reg) (rule (casloop_rotate_result $I8 _ bitshift result) (rot_imm_reg $I32 result 8 bitshift)) -(rule (casloop_rotate_result $I16 (bigendian) bitshift result) +(rule 1 (casloop_rotate_result $I16 (bigendian) bitshift result) (rot_imm_reg $I32 result 16 bitshift)) (rule (casloop_rotate_result $I16 (littleendian) bitshift result) (bswap_reg $I32 (rot_reg $I32 result bitshift))) @@ -3741,7 +3741,7 @@ ;; values that will end up in the higher-numbered lanes. (decl vec_pack_lane_order (Type Reg Reg) Reg) -(rule (vec_pack_lane_order ty x y) +(rule 1 (vec_pack_lane_order ty x y) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_pack ty x y)) (rule (vec_pack_lane_order ty x y) @@ -3749,7 +3749,7 @@ (vec_pack ty y x)) (decl vec_pack_ssat_lane_order (Type Reg Reg) Reg) -(rule (vec_pack_ssat_lane_order ty x y) +(rule 1 (vec_pack_ssat_lane_order ty x y) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_pack_ssat ty x y)) (rule (vec_pack_ssat_lane_order ty x y) @@ -3757,7 +3757,7 @@ (vec_pack_ssat ty y x)) (decl vec_pack_usat_lane_order (Type Reg Reg) Reg) -(rule (vec_pack_usat_lane_order ty x y) +(rule 1 (vec_pack_usat_lane_order ty x y) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_pack_usat ty x y)) (rule (vec_pack_usat_lane_order ty x y) @@ -3770,7 +3770,7 @@ ;; from higher-numbered lanes. (decl vec_unpacks_low_lane_order (Type Reg) Reg) -(rule (vec_unpacks_low_lane_order ty x) +(rule 1 (vec_unpacks_low_lane_order ty x) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_unpacks_high ty x)) (rule (vec_unpacks_low_lane_order ty x) @@ -3778,7 +3778,7 @@ (vec_unpacks_low ty x)) (decl vec_unpacks_high_lane_order (Type Reg) Reg) -(rule (vec_unpacks_high_lane_order ty x) +(rule 1 (vec_unpacks_high_lane_order ty x) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_unpacks_low ty x)) (rule (vec_unpacks_high_lane_order ty x) @@ -3786,7 +3786,7 @@ (vec_unpacks_high ty x)) (decl vec_unpacku_low_lane_order (Type Reg) Reg) -(rule (vec_unpacku_low_lane_order ty x) +(rule 1 (vec_unpacku_low_lane_order ty x) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_unpacku_high ty x)) (rule (vec_unpacku_low_lane_order ty x) @@ -3794,7 +3794,7 @@ (vec_unpacku_low ty x)) (decl vec_unpacku_high_lane_order (Type Reg) Reg) -(rule (vec_unpacku_high_lane_order ty x) +(rule 1 (vec_unpacku_high_lane_order ty x) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_unpacku_low ty x)) (rule (vec_unpacku_high_lane_order ty x) @@ -3831,7 +3831,7 @@ ;; lanes of the output. (decl vec_merge_low_lane_order (Type Reg Reg) Reg) -(rule (vec_merge_low_lane_order ty x y) +(rule 1 (vec_merge_low_lane_order ty x y) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_merge_high ty x y)) (rule (vec_merge_low_lane_order ty x y) @@ -3839,7 +3839,7 @@ (vec_merge_low ty y x)) (decl vec_merge_high_lane_order (Type Reg Reg) Reg) -(rule (vec_merge_high_lane_order ty x y) +(rule 1 (vec_merge_high_lane_order ty x y) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_merge_low ty x y)) (rule (vec_merge_high_lane_order ty x y) @@ -3862,7 +3862,7 @@ ;; result. This cannot use any of the normal flags mechanisms because we need ;; to use both result and condition code output of flogr as input to the ;; conditional move, and because flogr returns a register pair. -(rule (clz_reg zeroval x) +(rule -1 (clz_reg zeroval x) (let ((dst WritableRegPair (temp_writable_regpair)) (_ Unit (emit (MInst.Flogr x))) (_ Unit (emit (MInst.CMov64SImm16 (writable_regpair_hi dst) @@ -3893,7 +3893,7 @@ ;; Helpers for generating saturating integer instructions ;;;;;;;;;;;;;;;;;;;;;; (decl uint_sat_reg (Type Type Reg) Reg) -(rule (uint_sat_reg ty ty reg) reg) +(rule 1 (uint_sat_reg ty ty reg) reg) (rule (uint_sat_reg $I8 (ty_32_or_64 ty) reg) (with_flags_reg (icmpu_uimm32 ty reg 256) (cmov_imm ty (intcc_as_cond (IntCC.UnsignedGreaterThan)) 255 reg))) @@ -3908,7 +3908,7 @@ (select_bool_reg $I64 cond bound reg))) (decl sint_sat_reg (Type Type Reg) Reg) -(rule (sint_sat_reg ty ty reg) reg) +(rule 1 (sint_sat_reg ty ty reg) reg) (rule (sint_sat_reg $I8 (ty_32_or_64 ty) reg) (let ((ub Reg (with_flags_reg (icmps_simm16 ty reg 127) (cmov_imm ty @@ -4253,7 +4253,7 @@ (decl aluop_and (Type) ALUOp) (rule (aluop_and (gpr32_ty _ty)) (ALUOp.And32)) -(rule (aluop_and (gpr64_ty _ty)) (ALUOp.And64)) +(rule 1 (aluop_and (gpr64_ty _ty)) (ALUOp.And64)) (decl and_reg (Type Reg Reg) Reg) (rule (and_reg ty x y) (alu_rrr ty (aluop_and ty) x y)) @@ -4275,7 +4275,7 @@ (decl aluop_or (Type) ALUOp) (rule (aluop_or (gpr32_ty _ty)) (ALUOp.Orr32)) -(rule (aluop_or (gpr64_ty _ty)) (ALUOp.Orr64)) +(rule 1 (aluop_or (gpr64_ty _ty)) (ALUOp.Orr64)) (decl or_reg (Type Reg Reg) Reg) (rule (or_reg ty x y) (alu_rrr ty (aluop_or ty) x y)) @@ -4297,7 +4297,7 @@ (decl aluop_xor (Type) ALUOp) (rule (aluop_xor (gpr32_ty _ty)) (ALUOp.Xor32)) -(rule (aluop_xor (gpr64_ty _ty)) (ALUOp.Xor64)) +(rule 1 (aluop_xor (gpr64_ty _ty)) (ALUOp.Xor64)) (decl xor_reg (Type Reg Reg) Reg) (rule (xor_reg ty x y) (alu_rrr ty (aluop_xor ty) x y)) @@ -4321,7 +4321,7 @@ (decl not_reg (Type Reg) Reg) (rule (not_reg (gpr32_ty ty) x) (xor_uimm32shifted ty x (uimm32shifted 0xffffffff 0))) -(rule (not_reg (gpr64_ty ty) x) +(rule 1 (not_reg (gpr64_ty ty) x) (xor_uimm32shifted ty (xor_uimm32shifted ty x (uimm32shifted 0xffffffff 0)) (uimm32shifted 0xffffffff 32))) @@ -4329,7 +4329,7 @@ (decl push_not_reg (VecMInstBuilder Type WritableReg Reg) Reg) (rule (push_not_reg ib (gpr32_ty ty) dst src) (push_xor_uimm32shifted ib ty dst src (uimm32shifted 0xffffffff 0))) -(rule (push_not_reg ib (gpr64_ty ty) dst src) +(rule 1 (push_not_reg ib (gpr64_ty ty) dst src) (let ((val Reg (push_xor_uimm32shifted ib ty dst src (uimm32shifted 0xffffffff 0)))) (push_xor_uimm32shifted ib ty dst val (uimm32shifted 0xffffffff 32)))) @@ -4341,7 +4341,7 @@ (decl aluop_not_and (Type) ALUOp) (rule (aluop_not_and (gpr32_ty _ty)) (ALUOp.NotAnd32)) -(rule (aluop_not_and (gpr64_ty _ty)) (ALUOp.NotAnd64)) +(rule 1 (aluop_not_and (gpr64_ty _ty)) (ALUOp.NotAnd64)) (decl not_and_reg (Type Reg Reg) Reg) (rule (not_and_reg ty x y) (alu_rrr ty (aluop_not_and ty) x y)) @@ -4354,7 +4354,7 @@ (decl aluop_not_or (Type) ALUOp) (rule (aluop_not_or (gpr32_ty _ty)) (ALUOp.NotOrr32)) -(rule (aluop_not_or (gpr64_ty _ty)) (ALUOp.NotOrr64)) +(rule 1 (aluop_not_or (gpr64_ty _ty)) (ALUOp.NotOrr64)) (decl not_or_reg (Type Reg Reg) Reg) (rule (not_or_reg ty x y) (alu_rrr ty (aluop_not_or ty) x y)) @@ -4367,7 +4367,7 @@ (decl aluop_not_xor (Type) ALUOp) (rule (aluop_not_xor (gpr32_ty _ty)) (ALUOp.NotXor32)) -(rule (aluop_not_xor (gpr64_ty _ty)) (ALUOp.NotXor64)) +(rule 1 (aluop_not_xor (gpr64_ty _ty)) (ALUOp.NotXor64)) (decl not_xor_reg (Type Reg Reg) Reg) (rule (not_xor_reg ty x y) (alu_rrr ty (aluop_not_xor ty) x y)) @@ -4380,7 +4380,7 @@ (decl aluop_and_not (Type) ALUOp) (rule (aluop_and_not (gpr32_ty _ty)) (ALUOp.AndNot32)) -(rule (aluop_and_not (gpr64_ty _ty)) (ALUOp.AndNot64)) +(rule 1 (aluop_and_not (gpr64_ty _ty)) (ALUOp.AndNot64)) (decl and_not_reg (Type Reg Reg) Reg) (rule (and_not_reg ty x y) (alu_rrr ty (aluop_and_not ty) x y)) @@ -4393,7 +4393,7 @@ (decl aluop_or_not (Type) ALUOp) (rule (aluop_or_not (gpr32_ty _ty)) (ALUOp.OrrNot32)) -(rule (aluop_or_not (gpr64_ty _ty)) (ALUOp.OrrNot64)) +(rule 1 (aluop_or_not (gpr64_ty _ty)) (ALUOp.OrrNot64)) (decl or_not_reg (Type Reg Reg) Reg) (rule (or_not_reg ty x y) (alu_rrr ty (aluop_or_not ty) x y)) @@ -4837,7 +4837,7 @@ ;; Helpers for generating `fpromote` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl fpromote_reg (Type Type Reg) Reg) -(rule (fpromote_reg ty ty x) x) +(rule 1 (fpromote_reg ty ty x) x) (rule (fpromote_reg $F64 $F32 x) (fpu_rr $F64 (FPUOp1.Cvt32To64) x)) (rule (fpromote_reg $F64X2 $F32X4 x) @@ -4847,7 +4847,7 @@ ;; Helpers for generating `fdemote` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (decl fdemote_reg (Type Type FpuRoundMode Reg) Reg) -(rule (fdemote_reg ty ty mode x) x) +(rule 1 (fdemote_reg ty ty mode x) x) (rule (fdemote_reg $F32 $F64 mode x) (fpu_round $F32 (FpuRoundOp.Cvt64To32) mode x)) (rule (fdemote_reg $F32X4 $F64X2 mode x) @@ -4883,12 +4883,12 @@ ;; Helpers for generating `fcvt_to_[us]int` instructions ;;;;;;;;;;;;;;;;;;;;;;; (decl fcvt_flt_ty (Type Type) Type) -(rule (fcvt_flt_ty (fits_in_32 ty) (and (vxrs_ext2_enabled) $F32)) $F32) +(rule 1 (fcvt_flt_ty (fits_in_32 ty) (and (vxrs_ext2_enabled) $F32)) $F32) (rule (fcvt_flt_ty (fits_in_64 ty) $F32) $F64) (rule (fcvt_flt_ty (fits_in_64 ty) $F64) $F64) (decl fcvt_int_ty (Type Type) Type) -(rule (fcvt_int_ty (fits_in_32 ty) (and (vxrs_ext2_enabled) $F32)) $I32) +(rule 1 (fcvt_int_ty (fits_in_32 ty) (and (vxrs_ext2_enabled) $F32)) $I32) (rule (fcvt_int_ty (fits_in_64 ty) $F32) $I64) (rule (fcvt_int_ty (fits_in_64 ty) $F64) $I64) diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index c6ed27273958..44323bd4cc62 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -1,5 +1,7 @@ ;; s390x instruction selection and CLIF-to-MachInst lowering. +(pragma overlap_errors) + ;; The main lowering constructor term: takes a clif `Inst` and returns the ;; register(s) within which the lowered instruction's result values live. (decl lower (Inst) InstOutput) @@ -78,49 +80,49 @@ ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Add two registers. -(rule (lower (has_type (fits_in_64 ty) (iadd x y))) +(rule 0 (lower (has_type (fits_in_64 ty) (iadd x y))) (add_reg ty x y)) ;; Add a register and a sign-extended register. -(rule (lower (has_type (fits_in_64 ty) (iadd x (sext32_value y)))) +(rule 8 (lower (has_type (fits_in_64 ty) (iadd x (sext32_value y)))) (add_reg_sext32 ty x y)) -(rule (lower (has_type (fits_in_64 ty) (iadd (sext32_value x) y))) +(rule 15 (lower (has_type (fits_in_64 ty) (iadd (sext32_value x) y))) (add_reg_sext32 ty y x)) ;; Add a register and an immediate. -(rule (lower (has_type (fits_in_64 ty) (iadd x (i16_from_value y)))) +(rule 7 (lower (has_type (fits_in_64 ty) (iadd x (i16_from_value y)))) (add_simm16 ty x y)) -(rule (lower (has_type (fits_in_64 ty) (iadd (i16_from_value x) y))) +(rule 14 (lower (has_type (fits_in_64 ty) (iadd (i16_from_value x) y))) (add_simm16 ty y x)) -(rule (lower (has_type (fits_in_64 ty) (iadd x (i32_from_value y)))) +(rule 6 (lower (has_type (fits_in_64 ty) (iadd x (i32_from_value y)))) (add_simm32 ty x y)) -(rule (lower (has_type (fits_in_64 ty) (iadd (i32_from_value x) y))) +(rule 13 (lower (has_type (fits_in_64 ty) (iadd (i32_from_value x) y))) (add_simm32 ty y x)) ;; Add a register and memory (32/64-bit types). -(rule (lower (has_type (fits_in_64 ty) (iadd x (sinkable_load_32_64 y)))) +(rule 5 (lower (has_type (fits_in_64 ty) (iadd x (sinkable_load_32_64 y)))) (add_mem ty x (sink_load y))) -(rule (lower (has_type (fits_in_64 ty) (iadd (sinkable_load_32_64 x) y))) +(rule 12 (lower (has_type (fits_in_64 ty) (iadd (sinkable_load_32_64 x) y))) (add_mem ty y (sink_load x))) ;; Add a register and memory (16-bit types). -(rule (lower (has_type (fits_in_64 ty) (iadd x (sinkable_load_16 y)))) +(rule 4 (lower (has_type (fits_in_64 ty) (iadd x (sinkable_load_16 y)))) (add_mem_sext16 ty x (sink_load y))) -(rule (lower (has_type (fits_in_64 ty) (iadd (sinkable_load_16 x) y))) +(rule 11 (lower (has_type (fits_in_64 ty) (iadd (sinkable_load_16 x) y))) (add_mem_sext16 ty y (sink_load x))) ;; Add a register and sign-extended memory. -(rule (lower (has_type (fits_in_64 ty) (iadd x (sinkable_sload16 y)))) +(rule 3 (lower (has_type (fits_in_64 ty) (iadd x (sinkable_sload16 y)))) (add_mem_sext16 ty x (sink_sload16 y))) -(rule (lower (has_type (fits_in_64 ty) (iadd (sinkable_sload16 x) y))) +(rule 10 (lower (has_type (fits_in_64 ty) (iadd (sinkable_sload16 x) y))) (add_mem_sext16 ty y (sink_sload16 x))) -(rule (lower (has_type (fits_in_64 ty) (iadd x (sinkable_sload32 y)))) +(rule 2 (lower (has_type (fits_in_64 ty) (iadd x (sinkable_sload32 y)))) (add_mem_sext32 ty x (sink_sload32 y))) -(rule (lower (has_type (fits_in_64 ty) (iadd (sinkable_sload32 x) y))) +(rule 9 (lower (has_type (fits_in_64 ty) (iadd (sinkable_sload32 x) y))) (add_mem_sext32 ty y (sink_sload32 x))) ;; Add two vector registers. -(rule (lower (has_type (vr128_ty ty) (iadd x y))) +(rule 1 (lower (has_type (vr128_ty ty) (iadd x y))) (vec_add ty x y)) @@ -156,35 +158,35 @@ ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Sub two registers. -(rule (lower (has_type (fits_in_64 ty) (isub x y))) +(rule 0 (lower (has_type (fits_in_64 ty) (isub x y))) (sub_reg ty x y)) ;; Sub a register and a sign-extended register. -(rule (lower (has_type (fits_in_64 ty) (isub x (sext32_value y)))) +(rule 8 (lower (has_type (fits_in_64 ty) (isub x (sext32_value y)))) (sub_reg_sext32 ty x y)) ;; Sub a register and an immediate (using add of the negated value). -(rule (lower (has_type (fits_in_64 ty) (isub x (i16_from_negated_value y)))) +(rule 7 (lower (has_type (fits_in_64 ty) (isub x (i16_from_negated_value y)))) (add_simm16 ty x y)) -(rule (lower (has_type (fits_in_64 ty) (isub x (i32_from_negated_value y)))) +(rule 6 (lower (has_type (fits_in_64 ty) (isub x (i32_from_negated_value y)))) (add_simm32 ty x y)) ;; Sub a register and memory (32/64-bit types). -(rule (lower (has_type (fits_in_64 ty) (isub x (sinkable_load_32_64 y)))) +(rule 5 (lower (has_type (fits_in_64 ty) (isub x (sinkable_load_32_64 y)))) (sub_mem ty x (sink_load y))) ;; Sub a register and memory (16-bit types). -(rule (lower (has_type (fits_in_64 ty) (isub x (sinkable_load_16 y)))) +(rule 4 (lower (has_type (fits_in_64 ty) (isub x (sinkable_load_16 y)))) (sub_mem_sext16 ty x (sink_load y))) ;; Sub a register and sign-extended memory. -(rule (lower (has_type (fits_in_64 ty) (isub x (sinkable_sload16 y)))) +(rule 3 (lower (has_type (fits_in_64 ty) (isub x (sinkable_sload16 y)))) (sub_mem_sext16 ty x (sink_sload16 y))) -(rule (lower (has_type (fits_in_64 ty) (isub x (sinkable_sload32 y)))) +(rule 2 (lower (has_type (fits_in_64 ty) (isub x (sinkable_sload32 y)))) (sub_mem_sext32 ty x (sink_sload32 y))) ;; Sub two vector registers. -(rule (lower (has_type (vr128_ty ty) (isub x y))) +(rule 1 (lower (has_type (vr128_ty ty) (isub x y))) (vec_sub ty x y)) @@ -219,31 +221,31 @@ (output_pair reg (value_regs_invalid))) ;; Add two registers. -(rule (lower (has_type (fits_in_64 ty) (iadd_ifcout x y))) +(rule 0 (lower (has_type (fits_in_64 ty) (iadd_ifcout x y))) (output_ifcout (add_logical_reg ty x y))) ;; Add a register and a zero-extended register. -(rule (lower (has_type (fits_in_64 ty) (iadd_ifcout x (zext32_value y)))) +(rule 4 (lower (has_type (fits_in_64 ty) (iadd_ifcout x (zext32_value y)))) (output_ifcout (add_logical_reg_zext32 ty x y))) -(rule (lower (has_type (fits_in_64 ty) (iadd_ifcout (zext32_value x) y))) +(rule 8 (lower (has_type (fits_in_64 ty) (iadd_ifcout (zext32_value x) y))) (output_ifcout (add_logical_reg_zext32 ty y x))) ;; Add a register and an immediate. -(rule (lower (has_type (fits_in_64 ty) (iadd_ifcout x (u32_from_value y)))) +(rule 3 (lower (has_type (fits_in_64 ty) (iadd_ifcout x (u32_from_value y)))) (output_ifcout (add_logical_zimm32 ty x y))) -(rule (lower (has_type (fits_in_64 ty) (iadd_ifcout (u32_from_value x) y))) +(rule 7 (lower (has_type (fits_in_64 ty) (iadd_ifcout (u32_from_value x) y))) (output_ifcout (add_logical_zimm32 ty y x))) ;; Add a register and memory (32/64-bit types). -(rule (lower (has_type (fits_in_64 ty) (iadd_ifcout x (sinkable_load_32_64 y)))) +(rule 2 (lower (has_type (fits_in_64 ty) (iadd_ifcout x (sinkable_load_32_64 y)))) (output_ifcout (add_logical_mem ty x (sink_load y)))) -(rule (lower (has_type (fits_in_64 ty) (iadd_ifcout (sinkable_load_32_64 x) y))) +(rule 6 (lower (has_type (fits_in_64 ty) (iadd_ifcout (sinkable_load_32_64 x) y))) (output_ifcout (add_logical_mem ty y (sink_load x)))) ;; Add a register and zero-extended memory. -(rule (lower (has_type (fits_in_64 ty) (iadd_ifcout x (sinkable_uload32 y)))) +(rule 1 (lower (has_type (fits_in_64 ty) (iadd_ifcout x (sinkable_uload32 y)))) (output_ifcout (add_logical_mem_zext32 ty x (sink_uload32 y)))) -(rule (lower (has_type (fits_in_64 ty) (iadd_ifcout (sinkable_uload32 x) y))) +(rule 5 (lower (has_type (fits_in_64 ty) (iadd_ifcout (sinkable_uload32 x) y))) (output_ifcout (add_logical_mem_zext32 ty y (sink_uload32 x)))) @@ -251,19 +253,19 @@ ;; Absolute value of a register. ;; For types smaller than 32-bit, the input value must be sign-extended. -(rule (lower (has_type (fits_in_64 ty) (iabs x))) +(rule 2 (lower (has_type (fits_in_64 ty) (iabs x))) (abs_reg (ty_ext32 ty) (put_in_reg_sext32 x))) ;; Absolute value of a sign-extended register. -(rule (lower (has_type (fits_in_64 ty) (iabs (sext32_value x)))) +(rule 3 (lower (has_type (fits_in_64 ty) (iabs (sext32_value x)))) (abs_reg_sext32 ty x)) ;; Absolute value of a vector register. -(rule (lower (has_type (ty_vec128 ty) (iabs x))) +(rule 1 (lower (has_type (ty_vec128 ty) (iabs x))) (vec_abs ty x)) ;; Absolute value of a 128-bit integer. -(rule (lower (has_type $I128 (iabs x))) +(rule 0 (lower (has_type $I128 (iabs x))) (let ((zero Reg (vec_imm $I128 0)) (pos Reg x) (neg Reg (vec_sub $I128 zero pos)) @@ -275,19 +277,19 @@ ;;;; Rules for `ineg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Negate a register. -(rule (lower (has_type (fits_in_64 ty) (ineg x))) +(rule 2 (lower (has_type (fits_in_64 ty) (ineg x))) (neg_reg ty x)) ;; Negate a sign-extended register. -(rule (lower (has_type (fits_in_64 ty) (ineg (sext32_value x)))) +(rule 3 (lower (has_type (fits_in_64 ty) (ineg (sext32_value x)))) (neg_reg_sext32 ty x)) ;; Negate a vector register. -(rule (lower (has_type (ty_vec128 ty) (ineg x))) +(rule 1 (lower (has_type (ty_vec128 ty) (ineg x))) (vec_neg ty x)) ;; Negate a 128-bit integer. -(rule (lower (has_type $I128 (ineg x))) +(rule 0 (lower (has_type $I128 (ineg x))) (vec_sub $I128 (vec_imm $I128 0) x)) @@ -329,50 +331,50 @@ ;;;; Rules for `imul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Multiply two registers. -(rule (lower (has_type (fits_in_64 ty) (imul x y))) +(rule 0 (lower (has_type (fits_in_64 ty) (imul x y))) (mul_reg ty x y)) ;; Multiply a register and a sign-extended register. -(rule (lower (has_type (fits_in_64 ty) (imul x (sext32_value y)))) +(rule 8 (lower (has_type (fits_in_64 ty) (imul x (sext32_value y)))) (mul_reg_sext32 ty x y)) -(rule (lower (has_type (fits_in_64 ty) (imul (sext32_value x) y))) +(rule 15 (lower (has_type (fits_in_64 ty) (imul (sext32_value x) y))) (mul_reg_sext32 ty y x)) ;; Multiply a register and an immediate. -(rule (lower (has_type (fits_in_64 ty) (imul x (i16_from_value y)))) +(rule 7 (lower (has_type (fits_in_64 ty) (imul x (i16_from_value y)))) (mul_simm16 ty x y)) -(rule (lower (has_type (fits_in_64 ty) (imul (i16_from_value x) y))) +(rule 14 (lower (has_type (fits_in_64 ty) (imul (i16_from_value x) y))) (mul_simm16 ty y x)) -(rule (lower (has_type (fits_in_64 ty) (imul x (i32_from_value y)))) +(rule 6 (lower (has_type (fits_in_64 ty) (imul x (i32_from_value y)))) (mul_simm32 ty x y)) -(rule (lower (has_type (fits_in_64 ty) (imul (i32_from_value x) y))) +(rule 13 (lower (has_type (fits_in_64 ty) (imul (i32_from_value x) y))) (mul_simm32 ty y x)) ;; Multiply a register and memory (32/64-bit types). -(rule (lower (has_type (fits_in_64 ty) (imul x (sinkable_load_32_64 y)))) +(rule 5 (lower (has_type (fits_in_64 ty) (imul x (sinkable_load_32_64 y)))) (mul_mem ty x (sink_load y))) -(rule (lower (has_type (fits_in_64 ty) (imul (sinkable_load_32_64 x) y))) +(rule 12 (lower (has_type (fits_in_64 ty) (imul (sinkable_load_32_64 x) y))) (mul_mem ty y (sink_load x))) ;; Multiply a register and memory (16-bit types). -(rule (lower (has_type (fits_in_64 ty) (imul x (sinkable_load_16 y)))) +(rule 4 (lower (has_type (fits_in_64 ty) (imul x (sinkable_load_16 y)))) (mul_mem_sext16 ty x (sink_load y))) -(rule (lower (has_type (fits_in_64 ty) (imul (sinkable_load_16 x) y))) +(rule 11 (lower (has_type (fits_in_64 ty) (imul (sinkable_load_16 x) y))) (mul_mem_sext16 ty y (sink_load x))) ;; Multiply a register and sign-extended memory. -(rule (lower (has_type (fits_in_64 ty) (imul x (sinkable_sload16 y)))) +(rule 3 (lower (has_type (fits_in_64 ty) (imul x (sinkable_sload16 y)))) (mul_mem_sext16 ty x (sink_sload16 y))) -(rule (lower (has_type (fits_in_64 ty) (imul (sinkable_sload16 x) y))) +(rule 10 (lower (has_type (fits_in_64 ty) (imul (sinkable_sload16 x) y))) (mul_mem_sext16 ty y (sink_sload16 x))) -(rule (lower (has_type (fits_in_64 ty) (imul x (sinkable_sload32 y)))) +(rule 2 (lower (has_type (fits_in_64 ty) (imul x (sinkable_sload32 y)))) (mul_mem_sext32 ty x (sink_sload32 y))) -(rule (lower (has_type (fits_in_64 ty) (imul (sinkable_sload32 x) y))) +(rule 9 (lower (has_type (fits_in_64 ty) (imul (sinkable_sload32 x) y))) (mul_mem_sext32 ty y (sink_sload32 x))) ;; Multiply two vector registers, using a helper. (decl vec_mul_impl (Type Reg Reg) Reg) -(rule (lower (has_type (vr128_ty ty) (imul x y))) +(rule 1 (lower (has_type (vr128_ty ty) (imul x y))) (vec_mul_impl ty x y)) ;; Multiply two vector registers - byte, halfword, and word. @@ -406,7 +408,7 @@ ;;;; Rules for `umulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Multiply high part unsigned, 8-bit or 16-bit types. (Uses 32-bit multiply.) -(rule (lower (has_type (ty_8_or_16 ty) (umulhi x y))) +(rule -1 (lower (has_type (ty_8_or_16 ty) (umulhi x y))) (let ((ext_reg_x Reg (put_in_reg_zext32 x)) (ext_reg_y Reg (put_in_reg_zext32 y)) (ext_mul Reg (mul_reg $I32 ext_reg_x ext_reg_y))) @@ -444,7 +446,7 @@ ;;;; Rules for `smulhi` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Multiply high part signed, 8-bit or 16-bit types. (Uses 32-bit multiply.) -(rule (lower (has_type (ty_8_or_16 ty) (smulhi x y))) +(rule -1 (lower (has_type (ty_8_or_16 ty) (smulhi x y))) (let ((ext_reg_x Reg (put_in_reg_sext32 x)) (ext_reg_y Reg (put_in_reg_sext32 y)) (ext_mul Reg (mul_reg $I32 ext_reg_x ext_reg_y))) @@ -570,11 +572,11 @@ ;; If the `avoid_div_traps` flag is true, we perform the check explicitly. ;; This still can be omittted if the divisor is a non-zero immediate. (decl zero_divisor_check_needed (Value) bool) -(rule (zero_divisor_check_needed (i64_from_value x)) +(rule 2 (zero_divisor_check_needed (i64_from_value x)) (if (i64_nonzero x)) $false) -(rule (zero_divisor_check_needed (value_type (allow_div_traps))) $false) -(rule (zero_divisor_check_needed _) $true) +(rule 1 (zero_divisor_check_needed (value_type (allow_div_traps))) $false) +(rule 0 (zero_divisor_check_needed _) $true) ;; Perform the divide-by-zero check if required. ;; This is simply a compare-and-trap of the (extended) divisor against 0. @@ -653,7 +655,7 @@ ;; minimum (signed) integer value is divided by -1, so if the divisor ;; is any immediate different from -1, the check can be omitted. (decl div_overflow_check_needed (Value) bool) -(rule (div_overflow_check_needed (i64_from_value x)) +(rule 1 (div_overflow_check_needed (i64_from_value x)) (if (i64_not_neg1 x)) $false) (rule (div_overflow_check_needed _) $true) @@ -707,26 +709,26 @@ ;;;; Rules for `ishl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Shift left, shift amount in register. -(rule (lower (has_type (fits_in_64 ty) (ishl x y))) +(rule 0 (lower (has_type (fits_in_64 ty) (ishl x y))) (let ((masked_amt Reg (mask_amt_reg ty (amt_reg y)))) (lshl_reg ty x masked_amt))) ;; Shift left, immediate shift amount. -(rule (lower (has_type (fits_in_64 ty) (ishl x (i64_from_value y)))) +(rule 1 (lower (has_type (fits_in_64 ty) (ishl x (i64_from_value y)))) (let ((masked_amt u8 (mask_amt_imm ty y))) (lshl_imm ty x masked_amt))) ;; Vector shift left, shift amount in register. -(rule (lower (has_type (ty_vec128 ty) (ishl x y))) +(rule 2 (lower (has_type (ty_vec128 ty) (ishl x y))) (vec_lshl_reg ty x (amt_reg y))) ;; Vector shift left, immediate shift amount. -(rule (lower (has_type (ty_vec128 ty) (ishl x (i64_from_value y)))) +(rule 3 (lower (has_type (ty_vec128 ty) (ishl x (i64_from_value y)))) (let ((masked_amt u8 (mask_amt_imm ty y))) (vec_lshl_imm ty x masked_amt))) ;; 128-bit vector shift left. -(rule (lower (has_type $I128 (ishl x y))) +(rule 4 (lower (has_type $I128 (ishl x y))) (let ((amt Reg (amt_vr y))) (vec_lshl_by_bit (vec_lshl_by_byte x amt) amt))) @@ -735,29 +737,29 @@ ;; Shift right logical, shift amount in register. ;; For types smaller than 32-bit, the input value must be zero-extended. -(rule (lower (has_type (fits_in_64 ty) (ushr x y))) +(rule 0 (lower (has_type (fits_in_64 ty) (ushr x y))) (let ((ext_reg Reg (put_in_reg_zext32 x)) (masked_amt Reg (mask_amt_reg ty (amt_reg y)))) (lshr_reg (ty_ext32 ty) ext_reg masked_amt))) ;; Shift right logical, immediate shift amount. ;; For types smaller than 32-bit, the input value must be zero-extended. -(rule (lower (has_type (fits_in_64 ty) (ushr x (i64_from_value y)))) +(rule 1 (lower (has_type (fits_in_64 ty) (ushr x (i64_from_value y)))) (let ((ext_reg Reg (put_in_reg_zext32 x)) (masked_amt u8 (mask_amt_imm ty y))) (lshr_imm (ty_ext32 ty) ext_reg masked_amt))) ;; Vector shift right logical, shift amount in register. -(rule (lower (has_type (ty_vec128 ty) (ushr x y))) +(rule 2 (lower (has_type (ty_vec128 ty) (ushr x y))) (vec_lshr_reg ty x (amt_reg y))) ;; Vector shift right logical, immediate shift amount. -(rule (lower (has_type (ty_vec128 ty) (ushr x (i64_from_value y)))) +(rule 3 (lower (has_type (ty_vec128 ty) (ushr x (i64_from_value y)))) (let ((masked_amt u8 (mask_amt_imm ty y))) (vec_lshr_imm ty x masked_amt))) ;; 128-bit vector shift right logical. -(rule (lower (has_type $I128 (ushr x y))) +(rule 4 (lower (has_type $I128 (ushr x y))) (let ((amt Reg (amt_vr y))) (vec_lshr_by_bit (vec_lshr_by_byte x amt) amt))) @@ -766,29 +768,29 @@ ;; Shift right arithmetic, shift amount in register. ;; For types smaller than 32-bit, the input value must be sign-extended. -(rule (lower (has_type (fits_in_64 ty) (sshr x y))) +(rule 0 (lower (has_type (fits_in_64 ty) (sshr x y))) (let ((ext_reg Reg (put_in_reg_sext32 x)) (masked_amt Reg (mask_amt_reg ty (amt_reg y)))) (ashr_reg (ty_ext32 ty) ext_reg masked_amt))) ;; Shift right arithmetic, immediate shift amount. ;; For types smaller than 32-bit, the input value must be sign-extended. -(rule (lower (has_type (fits_in_64 ty) (sshr x (i64_from_value y)))) +(rule 1 (lower (has_type (fits_in_64 ty) (sshr x (i64_from_value y)))) (let ((ext_reg Reg (put_in_reg_sext32 x)) (masked_amt u8 (mask_amt_imm ty y))) (ashr_imm (ty_ext32 ty) ext_reg masked_amt))) ;; Vector shift right arithmetic, shift amount in register. -(rule (lower (has_type (ty_vec128 ty) (sshr x y))) +(rule 2 (lower (has_type (ty_vec128 ty) (sshr x y))) (vec_ashr_reg ty x (amt_reg y))) ;; Vector shift right arithmetic, immediate shift amount. -(rule (lower (has_type (ty_vec128 ty) (sshr x (i64_from_value y)))) +(rule 3 (lower (has_type (ty_vec128 ty) (sshr x (i64_from_value y)))) (let ((masked_amt u8 (mask_amt_imm ty y))) (vec_ashr_imm ty x masked_amt))) ;; 128-bit vector shift right arithmetic. -(rule (lower (has_type $I128 (sshr x y))) +(rule 4 (lower (has_type $I128 (sshr x y))) (let ((amt Reg (amt_vr y))) (vec_ashr_by_bit (vec_ashr_by_byte x amt) amt))) @@ -796,17 +798,17 @@ ;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Rotate left, shift amount in register. 32-bit or 64-bit types. -(rule (lower (has_type (ty_32_or_64 ty) (rotl x y))) +(rule 0 (lower (has_type (ty_32_or_64 ty) (rotl x y))) (rot_reg ty x (amt_reg y))) ;; Rotate left arithmetic, immediate shift amount. 32-bit or 64-bit types. -(rule (lower (has_type (ty_32_or_64 ty) (rotl x (i64_from_value y)))) +(rule 1 (lower (has_type (ty_32_or_64 ty) (rotl x (i64_from_value y)))) (let ((masked_amt u8 (mask_amt_imm ty y))) (rot_imm ty x masked_amt))) ;; Rotate left, shift amount in register. 8-bit or 16-bit types. ;; Implemented via a pair of 32-bit shifts on the zero-extended input. -(rule (lower (has_type (ty_8_or_16 ty) (rotl x y))) +(rule 2 (lower (has_type (ty_8_or_16 ty) (rotl x y))) (let ((ext_reg Reg (put_in_reg_zext32 x)) (ext_ty Type (ty_ext32 ty)) (pos_amt Reg (amt_reg y)) @@ -818,7 +820,7 @@ ;; Rotate left, immediate shift amount. 8-bit or 16-bit types. ;; Implemented via a pair of 32-bit shifts on the zero-extended input. -(rule (lower (has_type (ty_8_or_16 ty) (rotl x (and (i64_from_value pos_amt) +(rule 3 (lower (has_type (ty_8_or_16 ty) (rotl x (and (i64_from_value pos_amt) (i64_from_negated_value neg_amt))))) (let ((ext_reg Reg (put_in_reg_zext32 x)) (ext_ty Type (ty_ext32 ty)) @@ -828,17 +830,17 @@ (lshr_imm ext_ty ext_reg masked_neg_amt)))) ;; Vector rotate left, shift amount in register. -(rule (lower (has_type (ty_vec128 ty) (rotl x y))) +(rule 4 (lower (has_type (ty_vec128 ty) (rotl x y))) (vec_rot_reg ty x (amt_reg y))) ;; Vector rotate left, immediate shift amount. -(rule (lower (has_type (ty_vec128 ty) (rotl x (i64_from_value y)))) +(rule 5 (lower (has_type (ty_vec128 ty) (rotl x (i64_from_value y)))) (let ((masked_amt u8 (mask_amt_imm ty y))) (vec_rot_imm ty x masked_amt))) ;; 128-bit full vector rotate left. ;; Implemented via a pair of 128-bit full vector shifts. -(rule (lower (has_type $I128 (rotl x y))) +(rule 6 (lower (has_type $I128 (rotl x y))) (let ((x_reg Reg x) (pos_amt Reg (amt_vr y)) (neg_amt Reg (vec_neg $I8X16 pos_amt))) @@ -851,19 +853,19 @@ ;; Rotate right, shift amount in register. 32-bit or 64-bit types. ;; Implemented as rotate left with negated rotate amount. -(rule (lower (has_type (ty_32_or_64 ty) (rotr x y))) +(rule 0 (lower (has_type (ty_32_or_64 ty) (rotr x y))) (let ((negated_amt Reg (neg_reg $I32 (amt_reg y)))) (rot_reg ty x negated_amt))) ;; Rotate right arithmetic, immediate shift amount. 32-bit or 64-bit types. ;; Implemented as rotate left with negated rotate amount. -(rule (lower (has_type (ty_32_or_64 ty) (rotr x (i64_from_negated_value y)))) +(rule 1 (lower (has_type (ty_32_or_64 ty) (rotr x (i64_from_negated_value y)))) (let ((negated_amt u8 (mask_amt_imm ty y))) (rot_imm ty x negated_amt))) ;; Rotate right, shift amount in register. 8-bit or 16-bit types. ;; Implemented as rotate left with negated rotate amount. -(rule (lower (has_type (ty_8_or_16 ty) (rotr x y))) +(rule 2 (lower (has_type (ty_8_or_16 ty) (rotr x y))) (let ((ext_reg Reg (put_in_reg_zext32 x)) (ext_ty Type (ty_ext32 ty)) (pos_amt Reg (amt_reg y)) @@ -875,7 +877,7 @@ ;; Rotate right, immediate shift amount. 8-bit or 16-bit types. ;; Implemented as rotate left with negated rotate amount. -(rule (lower (has_type (ty_8_or_16 ty) (rotr x (and (i64_from_value pos_amt) +(rule 3 (lower (has_type (ty_8_or_16 ty) (rotr x (and (i64_from_value pos_amt) (i64_from_negated_value neg_amt))))) (let ((ext_reg Reg (put_in_reg_zext32 x)) (ext_ty Type (ty_ext32 ty)) @@ -886,19 +888,19 @@ ;; Vector rotate right, shift amount in register. ;; Implemented as rotate left with negated rotate amount. -(rule (lower (has_type (ty_vec128 ty) (rotr x y))) +(rule 4 (lower (has_type (ty_vec128 ty) (rotr x y))) (let ((negated_amt Reg (neg_reg $I32 (amt_reg y)))) (vec_rot_reg ty x negated_amt))) ;; Vector rotate right, immediate shift amount. ;; Implemented as rotate left with negated rotate amount. -(rule (lower (has_type (ty_vec128 ty) (rotr x (i64_from_negated_value y)))) +(rule 5 (lower (has_type (ty_vec128 ty) (rotr x (i64_from_negated_value y)))) (let ((negated_amt u8 (mask_amt_imm ty y))) (vec_rot_imm ty x negated_amt))) ;; 128-bit full vector rotate right. ;; Implemented via a pair of 128-bit full vector shifts. -(rule (lower (has_type $I128 (rotr x y))) +(rule 6 (lower (has_type $I128 (rotr x y))) (let ((x_reg Reg x) (pos_amt Reg (amt_vr y)) (neg_amt Reg (vec_neg $I8X16 pos_amt))) @@ -910,7 +912,7 @@ ;;;; Rules for `ireduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Up to 64-bit source type: Always a no-op. -(rule (lower (ireduce x @ (value_type (fits_in_64 _ty)))) +(rule 1 (lower (ireduce x @ (value_type (fits_in_64 _ty)))) x) ;; 128-bit source type: Extract the low half. @@ -921,11 +923,11 @@ ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 16- or 32-bit target types. -(rule (lower (has_type (gpr32_ty _ty) (uextend x))) +(rule 1 (lower (has_type (gpr32_ty _ty) (uextend x))) (put_in_reg_zext32 x)) ;; 64-bit target types. -(rule (lower (has_type (gpr64_ty _ty) (uextend x))) +(rule 2 (lower (has_type (gpr64_ty _ty) (uextend x))) (put_in_reg_zext64 x)) ;; 128-bit target types. @@ -942,11 +944,11 @@ ;;;; Rules for `sextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 16- or 32-bit target types. -(rule (lower (has_type (gpr32_ty _ty) (sextend x))) +(rule 1 (lower (has_type (gpr32_ty _ty) (sextend x))) (put_in_reg_sext32 x)) ;; 64-bit target types. -(rule (lower (has_type (gpr64_ty _ty) (sextend x))) +(rule 2 (lower (has_type (gpr64_ty _ty) (sextend x))) (put_in_reg_sext64 x)) ;; 128-bit target types. @@ -1001,12 +1003,12 @@ ;;;; Rules for `bnot` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; z15 version using a single instruction (NOR). -(rule (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bnot x))) +(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bnot x))) (let ((rx Reg x)) (not_or_reg ty rx rx))) ;; z14 version using XOR with -1. -(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bnot x))) +(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bnot x))) (not_reg ty x)) ;; Vector version using vector NOR. @@ -1017,87 +1019,87 @@ ;;;; Rules for `band` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; And two registers. -(rule (lower (has_type (fits_in_64 ty) (band x y))) +(rule -1 (lower (has_type (fits_in_64 ty) (band x y))) (and_reg ty x y)) ;; And a register and an immediate. -(rule (lower (has_type (fits_in_64 ty) (band x (uimm16shifted_from_inverted_value y)))) +(rule 5 (lower (has_type (fits_in_64 ty) (band x (uimm16shifted_from_inverted_value y)))) (and_uimm16shifted ty x y)) -(rule (lower (has_type (fits_in_64 ty) (band (uimm16shifted_from_inverted_value x) y))) +(rule 6 (lower (has_type (fits_in_64 ty) (band (uimm16shifted_from_inverted_value x) y))) (and_uimm16shifted ty y x)) -(rule (lower (has_type (fits_in_64 ty) (band x (uimm32shifted_from_inverted_value y)))) +(rule 3 (lower (has_type (fits_in_64 ty) (band x (uimm32shifted_from_inverted_value y)))) (and_uimm32shifted ty x y)) -(rule (lower (has_type (fits_in_64 ty) (band (uimm32shifted_from_inverted_value x) y))) +(rule 4 (lower (has_type (fits_in_64 ty) (band (uimm32shifted_from_inverted_value x) y))) (and_uimm32shifted ty y x)) ;; And a register and memory (32/64-bit types). -(rule (lower (has_type (fits_in_64 ty) (band x (sinkable_load_32_64 y)))) +(rule 1 (lower (has_type (fits_in_64 ty) (band x (sinkable_load_32_64 y)))) (and_mem ty x (sink_load y))) -(rule (lower (has_type (fits_in_64 ty) (band (sinkable_load_32_64 x) y))) +(rule 2 (lower (has_type (fits_in_64 ty) (band (sinkable_load_32_64 x) y))) (and_mem ty y (sink_load x))) ;; And two vector registers. -(rule (lower (has_type (vr128_ty ty) (band x y))) +(rule 0 (lower (has_type (vr128_ty ty) (band x y))) (vec_and ty x y)) ;;;; Rules for `bor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Or two registers. -(rule (lower (has_type (fits_in_64 ty) (bor x y))) +(rule -1 (lower (has_type (fits_in_64 ty) (bor x y))) (or_reg ty x y)) ;; Or a register and an immediate. -(rule (lower (has_type (fits_in_64 ty) (bor x (uimm16shifted_from_value y)))) +(rule 5 (lower (has_type (fits_in_64 ty) (bor x (uimm16shifted_from_value y)))) (or_uimm16shifted ty x y)) -(rule (lower (has_type (fits_in_64 ty) (bor (uimm16shifted_from_value x) y))) +(rule 6 (lower (has_type (fits_in_64 ty) (bor (uimm16shifted_from_value x) y))) (or_uimm16shifted ty y x)) -(rule (lower (has_type (fits_in_64 ty) (bor x (uimm32shifted_from_value y)))) +(rule 3 (lower (has_type (fits_in_64 ty) (bor x (uimm32shifted_from_value y)))) (or_uimm32shifted ty x y)) -(rule (lower (has_type (fits_in_64 ty) (bor (uimm32shifted_from_value x) y))) +(rule 4 (lower (has_type (fits_in_64 ty) (bor (uimm32shifted_from_value x) y))) (or_uimm32shifted ty y x)) ;; Or a register and memory (32/64-bit types). -(rule (lower (has_type (fits_in_64 ty) (bor x (sinkable_load_32_64 y)))) +(rule 1 (lower (has_type (fits_in_64 ty) (bor x (sinkable_load_32_64 y)))) (or_mem ty x (sink_load y))) -(rule (lower (has_type (fits_in_64 ty) (bor (sinkable_load_32_64 x) y))) +(rule 2 (lower (has_type (fits_in_64 ty) (bor (sinkable_load_32_64 x) y))) (or_mem ty y (sink_load x))) ;; Or two vector registers. -(rule (lower (has_type (vr128_ty ty) (bor x y))) +(rule 0 (lower (has_type (vr128_ty ty) (bor x y))) (vec_or ty x y)) ;;;; Rules for `bxor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Xor two registers. -(rule (lower (has_type (fits_in_64 ty) (bxor x y))) +(rule -1 (lower (has_type (fits_in_64 ty) (bxor x y))) (xor_reg ty x y)) ;; Xor a register and an immediate. -(rule (lower (has_type (fits_in_64 ty) (bxor x (uimm32shifted_from_value y)))) +(rule 3 (lower (has_type (fits_in_64 ty) (bxor x (uimm32shifted_from_value y)))) (xor_uimm32shifted ty x y)) -(rule (lower (has_type (fits_in_64 ty) (bxor (uimm32shifted_from_value x) y))) +(rule 4 (lower (has_type (fits_in_64 ty) (bxor (uimm32shifted_from_value x) y))) (xor_uimm32shifted ty y x)) ;; Xor a register and memory (32/64-bit types). -(rule (lower (has_type (fits_in_64 ty) (bxor x (sinkable_load_32_64 y)))) +(rule 1 (lower (has_type (fits_in_64 ty) (bxor x (sinkable_load_32_64 y)))) (xor_mem ty x (sink_load y))) -(rule (lower (has_type (fits_in_64 ty) (bxor (sinkable_load_32_64 x) y))) +(rule 2 (lower (has_type (fits_in_64 ty) (bxor (sinkable_load_32_64 x) y))) (xor_mem ty y (sink_load x))) ;; Xor two vector registers. -(rule (lower (has_type (vr128_ty ty) (bxor x y))) +(rule 0 (lower (has_type (vr128_ty ty) (bxor x y))) (vec_xor ty x y)) ;;;; Rules for `band_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; z15 version using a single instruction. -(rule (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (band_not x y))) +(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (band_not x y))) (and_not_reg ty x y)) ;; z14 version using XOR with -1. -(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (band_not x y))) +(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (band_not x y))) (and_reg ty x (not_reg ty y))) ;; And-not two vector registers. @@ -1108,11 +1110,11 @@ ;;;; Rules for `bor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; z15 version using a single instruction. -(rule (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bor_not x y))) +(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bor_not x y))) (or_not_reg ty x y)) ;; z14 version using XOR with -1. -(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bor_not x y))) +(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bor_not x y))) (or_reg ty x (not_reg ty y))) ;; Or-not two vector registers. @@ -1123,11 +1125,11 @@ ;;;; Rules for `bxor_not` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; z15 version using a single instruction. -(rule (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor_not x y))) +(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bxor_not x y))) (not_xor_reg ty x y)) ;; z14 version using XOR with -1. -(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bxor_not x y))) +(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bxor_not x y))) (not_reg ty (xor_reg ty x y))) ;; Xor-not two vector registers. @@ -1138,14 +1140,14 @@ ;;;; Rules for `bitselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; z15 version using a NAND instruction. -(rule (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bitselect x y z))) +(rule 2 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (bitselect x y z))) (let ((rx Reg x) (if_true Reg (and_reg ty y rx)) (if_false Reg (and_not_reg ty z rx))) (or_reg ty if_false if_true))) ;; z14 version using XOR with -1. -(rule (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bitselect x y z))) +(rule 1 (lower (has_type (and (mie2_disabled) (fits_in_64 ty)) (bitselect x y z))) (let ((rx Reg x) (if_true Reg (and_reg ty y rx)) (if_false Reg (and_reg ty z (not_reg ty rx)))) @@ -1166,7 +1168,7 @@ ;;;; Rules for `breduce` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Up to 64-bit source type: Always a no-op. -(rule (lower (breduce x @ (value_type (fits_in_64 _ty)))) +(rule 1 (lower (breduce x @ (value_type (fits_in_64 _ty)))) x) ;; 128-bit source type: Extract the low half. @@ -1184,70 +1186,70 @@ (cast_bool ty x)) ;; If the target has the same or a smaller size than the source, it's a no-op. -(rule (cast_bool $B1 x @ (value_type $B1)) x) -(rule (cast_bool $B1 x @ (value_type $B8)) x) -(rule (cast_bool $B8 x @ (value_type $B8)) x) -(rule (cast_bool $I8 x @ (value_type $B8)) x) -(rule (cast_bool (fits_in_16 _ty) x @ (value_type $B16)) x) -(rule (cast_bool (fits_in_32 _ty) x @ (value_type $B32)) x) -(rule (cast_bool (fits_in_64 _ty) x @ (value_type $B64)) x) -(rule (cast_bool (vr128_ty _ty) x @ (value_type $B128)) x) -(rule (cast_bool (fits_in_64 _ty) x @ (value_type $B128)) +(rule 8 (cast_bool $B1 x @ (value_type $B1)) x) +(rule 8 (cast_bool $B1 x @ (value_type $B8)) x) +(rule 8 (cast_bool $B8 x @ (value_type $B8)) x) +(rule 8 (cast_bool $I8 x @ (value_type $B8)) x) +(rule 7 (cast_bool (fits_in_16 _ty) x @ (value_type $B16)) x) +(rule 6 (cast_bool (fits_in_32 _ty) x @ (value_type $B32)) x) +(rule 5 (cast_bool (fits_in_64 _ty) x @ (value_type $B64)) x) +(rule 4 (cast_bool (vr128_ty _ty) x @ (value_type $B128)) x) +(rule 5 (cast_bool (fits_in_64 _ty) x @ (value_type $B128)) (vec_extract_lane $I64X2 x 1 (zero_reg))) ;; Single-bit values are sign-extended via a pair of shifts. -(rule (cast_bool (gpr32_ty ty) x @ (value_type $B1)) +(rule 0 (cast_bool (gpr32_ty ty) x @ (value_type $B1)) (ashr_imm $I32 (lshl_imm $I32 x 31) 31)) -(rule (cast_bool (gpr64_ty ty) x @ (value_type $B1)) +(rule 1 (cast_bool (gpr64_ty ty) x @ (value_type $B1)) (ashr_imm $I64 (lshl_imm $I64 x 63) 63)) -(rule (cast_bool (vr128_ty ty) x @ (value_type $B1)) +(rule 4 (cast_bool (vr128_ty ty) x @ (value_type $B1)) (let ((gpr Reg (ashr_imm $I64 (lshl_imm $I64 x 63) 63))) (mov_to_vec128 ty gpr gpr))) ;; Other values are just sign-extended normally. -(rule (cast_bool (gpr32_ty _ty) x @ (value_type $B8)) +(rule 0 (cast_bool (gpr32_ty _ty) x @ (value_type $B8)) (sext32_reg $I8 x)) -(rule (cast_bool (gpr32_ty _ty) x @ (value_type $B16)) +(rule 0 (cast_bool (gpr32_ty _ty) x @ (value_type $B16)) (sext32_reg $I16 x)) -(rule (cast_bool (gpr64_ty _ty) x @ (value_type $B8)) +(rule 1(cast_bool (gpr64_ty _ty) x @ (value_type $B8)) (sext64_reg $I8 x)) -(rule (cast_bool (gpr64_ty _ty) x @ (value_type $B16)) +(rule 1(cast_bool (gpr64_ty _ty) x @ (value_type $B16)) (sext64_reg $I16 x)) -(rule (cast_bool (gpr64_ty _ty) x @ (value_type $B32)) +(rule 1(cast_bool (gpr64_ty _ty) x @ (value_type $B32)) (sext64_reg $I32 x)) -(rule (cast_bool (vr128_ty ty) x @ (value_type (gpr32_ty src_ty))) +(rule 3 (cast_bool (vr128_ty ty) x @ (value_type (gpr32_ty src_ty))) (let ((x_ext Reg (sext64_reg src_ty x))) (mov_to_vec128 ty x_ext x_ext))) -(rule (cast_bool (vr128_ty ty) x @ (value_type (gpr64_ty src_ty))) +(rule 2 (cast_bool (vr128_ty ty) x @ (value_type (gpr64_ty src_ty))) (mov_to_vec128 ty x x)) ;;;; Rules for `bint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Mask with 1 to get a 0/1 result (8- or 16-bit result types). -(rule (lower (has_type (fits_in_16 ty) (bint x @ (value_type (fits_in_64 _))))) +(rule 5 (lower (has_type (fits_in_16 ty) (bint x @ (value_type (fits_in_64 _))))) (and_uimm16shifted ty x (uimm16shifted 1 0))) ;; Mask with 1 to get a 0/1 result (32-bit result types). -(rule (lower (has_type (fits_in_32 ty) (bint x @ (value_type (fits_in_64 _))))) +(rule 4 (lower (has_type (fits_in_32 ty) (bint x @ (value_type (fits_in_64 _))))) (and_uimm32shifted ty x (uimm32shifted 1 0))) ;; Mask with 1 to get a 0/1 result (64-bit result types). -(rule (lower (has_type (fits_in_64 ty) (bint x @ (value_type (fits_in_64 _))))) +(rule 3 (lower (has_type (fits_in_64 ty) (bint x @ (value_type (fits_in_64 _))))) (and_reg ty x (imm ty 1))) ;; Mask with 1 to get a 0/1 result (128-bit result types). -(rule (lower (has_type (vr128_ty ty) (bint x @ (value_type (fits_in_64 _))))) +(rule 1 (lower (has_type (vr128_ty ty) (bint x @ (value_type (fits_in_64 _))))) (let ((x_ext Reg (and_uimm16shifted $I8 x (uimm16shifted 1 0)))) (vec_insert_lane $I8X16 (vec_imm ty 0) x_ext 15 (zero_reg)))) ;; Mask with 1 to get a 0/1 result (128-bit source types). -(rule (lower (has_type (fits_in_64 ty) (bint x @ (value_type (vr128_ty _))))) +(rule 2 (lower (has_type (fits_in_64 ty) (bint x @ (value_type (vr128_ty _))))) (let ((x_gpr Reg (vec_extract_lane $I8X16 x 15 (zero_reg)))) (and_uimm16shifted ty x_gpr (uimm16shifted 1 0)))) ;; Mask with 1 to get a 0/1 result (128-bit source and result types). -(rule (lower (has_type (vr128_ty ty) (bint x @ (value_type (vr128_ty _))))) +(rule 0 (lower (has_type (vr128_ty ty) (bint x @ (value_type (vr128_ty _))))) (vec_and ty x (vec_imm ty 1))) @@ -1260,7 +1262,7 @@ (bitrev_bits 1 0xaaaa_aaaa_aaaa_aaaa ty x))))) (decl bitrev_bits (u8 u64 Type Reg) Reg) -(rule (bitrev_bits size bitmask (fits_in_64 ty) x) +(rule 1 (bitrev_bits size bitmask (fits_in_64 ty) x) (let ((mask Reg (imm ty bitmask)) (xh Reg (lshl_imm (ty_ext32 ty) x size)) (xl Reg (lshr_imm (ty_ext32 ty) x size)) @@ -1299,7 +1301,7 @@ ;; Count leading zeros, via FLOGR on an input zero-extended to 64 bits, ;; with the result compensated for the extra bits. -(rule (lower (has_type (fits_in_64 ty) (clz x))) +(rule 1 (lower (has_type (fits_in_64 ty) (clz x))) (let ((ext_reg Reg (put_in_reg_zext64 x)) ;; Ask for a value of 64 in the all-zero 64-bit input case. ;; After compensation this will match the expected semantics. @@ -1333,7 +1335,7 @@ ;; i.e. computing ;; cls(x) == clz(x ^ (x >> 63)) - 1 ;; where x is the sign-extended input. -(rule (lower (has_type (fits_in_64 ty) (cls x))) +(rule 1 (lower (has_type (fits_in_64 ty) (cls x))) (let ((ext_reg Reg (put_in_reg_sext64 x)) (signbit_copies Reg (ashr_imm $I64 ext_reg 63)) (inv_reg Reg (xor_reg $I64 ext_reg signbit_copies)) @@ -1370,7 +1372,7 @@ ;; never zero by setting a "guard bit" in the position corresponding to ;; the input type size. This way the 64-bit algorithm above will handle ;; that case correctly automatically. -(rule (lower (has_type (gpr32_ty ty) (ctz x))) +(rule 2 (lower (has_type (gpr32_ty ty) (ctz x))) (let ((rx Reg (or_uimm16shifted $I64 x (ctz_guardbit ty))) (lastbit Reg (and_reg $I64 rx (neg_reg $I64 rx))) (clz RegPair (clz_reg 64 lastbit))) @@ -1385,14 +1387,14 @@ ;; via its condition code. We check for that and replace the instruction ;; result with the value -1 via a conditional move, which will then lead to ;; the correct result after the final subtraction from 63. -(rule (lower (has_type (gpr64_ty _ty) (ctz x))) +(rule 1 (lower (has_type (gpr64_ty _ty) (ctz x))) (let ((rx Reg x) (lastbit Reg (and_reg $I64 rx (neg_reg $I64 rx))) (clz RegPair (clz_reg -1 lastbit))) (sub_reg $I64 (imm $I64 63) (regpair_hi clz)))) ;; Count trailing zeros, 128-bit full vector. -(rule (lower (has_type $I128 (ctz x))) +(rule 0 (lower (has_type $I128 (ctz x))) (let ((ctz_vec Reg (vec_ctz $I64X2 x)) (zero Reg (vec_imm $I64X2 0)) (ctz_hi Reg (vec_permute_dw_imm $I64X2 zero 0 ctz_vec 0)) @@ -1410,7 +1412,7 @@ ;; On z15, the POPCNT instruction has a variant to compute a full 64-bit ;; population count, which we also use for 16- and 32-bit types. -(rule (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (popcnt x))) +(rule -1 (lower (has_type (and (mie2_enabled) (fits_in_64 ty)) (popcnt x))) (popcnt_reg (put_in_reg_zext64 x))) ;; On z14, we use the regular POPCNT, which computes the population count @@ -1440,7 +1442,7 @@ (lshr_imm $I64 cnt1 56))) ;; Population count for vector types. -(rule (lower (has_type (ty_vec128 ty) (popcnt x))) +(rule 1 (lower (has_type (ty_vec128 ty) (popcnt x))) (vec_popcnt ty x)) ;; Population count, 128-bit full vector. @@ -1610,7 +1612,7 @@ ;;;; Rules for `fcvt_from_uint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Convert a 32-bit or smaller unsigned integer to $F32 (z15 instruction). -(rule (lower (has_type $F32 +(rule 1 (lower (has_type $F32 (fcvt_from_uint x @ (value_type (and (vxrs_ext2_enabled) (fits_in_32 ty)))))) (fcvt_from_uint_reg $F32 (FpuRoundMode.ToNearestTiesToEven) (put_in_reg_zext32 x))) @@ -1627,7 +1629,7 @@ (put_in_reg_zext64 x))) ;; Convert $I32X4 to $F32X4 (z15 instruction). -(rule (lower (has_type (and (vxrs_ext2_enabled) $F32X4) +(rule 1 (lower (has_type (and (vxrs_ext2_enabled) $F32X4) (fcvt_from_uint x @ (value_type $I32X4)))) (fcvt_from_uint_reg $F32X4 (FpuRoundMode.ToNearestTiesToEven) x)) @@ -1651,7 +1653,7 @@ ;;;; Rules for `fcvt_from_sint` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Convert a 32-bit or smaller signed integer to $F32 (z15 instruction). -(rule (lower (has_type $F32 +(rule 1 (lower (has_type $F32 (fcvt_from_sint x @ (value_type (and (vxrs_ext2_enabled) (fits_in_32 ty)))))) (fcvt_from_sint_reg $F32 (FpuRoundMode.ToNearestTiesToEven) (put_in_reg_sext32 x))) @@ -1668,7 +1670,7 @@ (put_in_reg_sext64 x))) ;; Convert $I32X4 to $F32X4 (z15 instruction). -(rule (lower (has_type (and (vxrs_ext2_enabled) $F32X4) +(rule 1 (lower (has_type (and (vxrs_ext2_enabled) $F32X4) (fcvt_from_sint x @ (value_type $I32X4)))) (fcvt_from_sint_reg $F32X4 (FpuRoundMode.ToNearestTiesToEven) x)) @@ -1749,7 +1751,7 @@ ;;;; Rules for `fcvt_to_uint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Convert a scalar floating-point value in a register to an unsigned integer. -(rule (lower (has_type (fits_in_64 dst_ty) +(rule -1 (lower (has_type (fits_in_64 dst_ty) (fcvt_to_uint_sat x @ (value_type src_ty)))) (let ((src Reg (put_in_reg x)) ;; Perform the conversion using the larger type size. @@ -1761,7 +1763,7 @@ (uint_sat_reg dst_ty int_ty dst))) ;; Convert $F32X4 to $I32X4 (z15 instruction). -(rule (lower (has_type (and (vxrs_ext2_enabled) $I32X4) +(rule 1 (lower (has_type (and (vxrs_ext2_enabled) $I32X4) (fcvt_to_uint_sat x @ (value_type $F32X4)))) (fcvt_to_uint_reg $F32X4 (FpuRoundMode.ToZero) x)) @@ -1782,7 +1784,7 @@ ;;;; Rules for `fcvt_to_sint_sat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Convert a scalar floating-point value in a register to a signed integer. -(rule (lower (has_type (fits_in_64 dst_ty) +(rule -1 (lower (has_type (fits_in_64 dst_ty) (fcvt_to_sint_sat x @ (value_type src_ty)))) (let ((src Reg (put_in_reg x)) ;; Perform the conversion using the larger type size. @@ -1801,7 +1803,7 @@ (sint_sat_reg dst_ty int_ty sat))) ;; Convert $F32X4 to $I32X4 (z15 instruction). -(rule (lower (has_type (and (vxrs_ext2_enabled) $I32X4) +(rule 1 (lower (has_type (and (vxrs_ext2_enabled) $I32X4) (fcvt_to_sint_sat src @ (value_type $F32X4)))) ;; See above for why we need to handle NaNs specially. (vec_select $I32X4 @@ -1861,37 +1863,37 @@ ;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Insert vector lane from general-purpose register. -(rule (lower (insertlane x @ (value_type ty) +(rule 1 (lower (insertlane x @ (value_type ty) y @ (value_type in_ty) (u8_from_uimm8 idx))) (if (ty_int_bool_ref_scalar_64 in_ty)) (vec_insert_lane ty x y (be_lane_idx ty idx) (zero_reg))) ;; Insert vector lane from floating-point register. -(rule (lower (insertlane x @ (value_type ty) +(rule 0 (lower (insertlane x @ (value_type ty) y @ (value_type (ty_scalar_float _)) (u8_from_uimm8 idx))) (vec_move_lane_and_insert ty x (be_lane_idx ty idx) y 0)) ;; Insert vector lane from another vector lane. -(rule (lower (insertlane x @ (value_type ty) +(rule 2 (lower (insertlane x @ (value_type ty) (extractlane y (u8_from_uimm8 src_idx)) (u8_from_uimm8 dst_idx))) (vec_move_lane_and_insert ty x (be_lane_idx ty dst_idx) y (be_lane_idx ty src_idx))) ;; Insert vector lane from signed 16-bit immediate. -(rule (lower (insertlane x @ (value_type ty) (i16_from_value y) +(rule 3 (lower (insertlane x @ (value_type ty) (i16_from_value y) (u8_from_uimm8 idx))) (vec_insert_lane_imm ty x y (be_lane_idx ty idx))) ;; Insert vector lane from big-endian memory. -(rule (lower (insertlane x @ (value_type ty) (sinkable_load y) +(rule 4 (lower (insertlane x @ (value_type ty) (sinkable_load y) (u8_from_uimm8 idx))) (vec_load_lane ty x (sink_load y) (be_lane_idx ty idx))) ;; Insert vector lane from little-endian memory. -(rule (lower (insertlane x @ (value_type ty) (sinkable_load_little y) +(rule 5 (lower (insertlane x @ (value_type ty) (sinkable_load_little y) (u8_from_uimm8 idx))) (vec_load_lane_little ty x (sink_load y) (be_lane_idx ty idx))) @@ -1906,12 +1908,12 @@ (vec_permute_dw_imm ty dst 0 src src_idx)) ;; If source and destination index are the same, use vec_select. -(rule (vec_move_lane_and_insert ty dst idx src idx) +(rule -1 (vec_move_lane_and_insert ty dst idx src idx) (vec_select ty src dst (vec_imm_byte_mask ty (lane_byte_mask ty idx)))) ;; Otherwise replicate source first and then use vec_select. -(rule (vec_move_lane_and_insert ty dst dst_idx src src_idx) +(rule -2 (vec_move_lane_and_insert ty dst dst_idx src src_idx) (vec_select ty (vec_replicate_lane ty src src_idx) dst (vec_imm_byte_mask ty (lane_byte_mask ty dst_idx)))) @@ -1924,13 +1926,13 @@ (vec_load_lane ty dst addr lane_imm)) ;; On z15, we have instructions to perform little-endian loads. -(rule (vec_load_lane_little (and (vxrs_ext2_enabled) +(rule 1 (vec_load_lane_little (and (vxrs_ext2_enabled) ty @ (multi_lane 16 _)) dst addr lane_imm) (vec_load_lane_rev ty dst addr lane_imm)) -(rule (vec_load_lane_little (and (vxrs_ext2_enabled) +(rule 1 (vec_load_lane_little (and (vxrs_ext2_enabled) ty @ (multi_lane 32 _)) dst addr lane_imm) (vec_load_lane_rev ty dst addr lane_imm)) -(rule (vec_load_lane_little (and (vxrs_ext2_enabled) +(rule 1 (vec_load_lane_little (and (vxrs_ext2_enabled) ty @ (multi_lane 64 _)) dst addr lane_imm) (vec_load_lane_rev ty dst addr lane_imm)) @@ -1953,13 +1955,13 @@ (vec_load_lane_undef ty addr lane_imm)) ;; On z15, we have instructions to perform little-endian loads. -(rule (vec_load_lane_little_undef (and (vxrs_ext2_enabled) +(rule 1 (vec_load_lane_little_undef (and (vxrs_ext2_enabled) ty @ (multi_lane 16 _)) addr lane_imm) (vec_load_lane_rev_undef ty addr lane_imm)) -(rule (vec_load_lane_little_undef (and (vxrs_ext2_enabled) +(rule 1 (vec_load_lane_little_undef (and (vxrs_ext2_enabled) ty @ (multi_lane 32 _)) addr lane_imm) (vec_load_lane_rev_undef ty addr lane_imm)) -(rule (vec_load_lane_little_undef (and (vxrs_ext2_enabled) +(rule 1 (vec_load_lane_little_undef (and (vxrs_ext2_enabled) ty @ (multi_lane 64 _)) addr lane_imm) (vec_load_lane_rev_undef ty addr lane_imm)) @@ -1978,25 +1980,25 @@ ;;;; Rules for `extractlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Extract vector lane to general-purpose register. -(rule (lower (has_type out_ty +(rule 1 (lower (has_type out_ty (extractlane x @ (value_type ty) (u8_from_uimm8 idx)))) (if (ty_int_bool_ref_scalar_64 out_ty)) (vec_extract_lane ty x (be_lane_idx ty idx) (zero_reg))) ;; Extract vector lane to floating-point register. -(rule (lower (has_type (ty_scalar_float _) +(rule 0 (lower (has_type (ty_scalar_float _) (extractlane x @ (value_type ty) (u8_from_uimm8 idx)))) (vec_replicate_lane ty x (be_lane_idx ty idx))) ;; Extract vector lane and store to big-endian memory. -(rule (lower (store flags @ (bigendian) +(rule 6 (lower (store flags @ (bigendian) (extractlane x @ (value_type ty) (u8_from_uimm8 idx)) addr offset)) (side_effect (vec_store_lane ty x (lower_address flags addr offset) (be_lane_idx ty idx)))) ;; Extract vector lane and store to little-endian memory. -(rule (lower (store flags @ (littleendian) +(rule 5 (lower (store flags @ (littleendian) (extractlane x @ (value_type ty) (u8_from_uimm8 idx)) addr offset)) (side_effect (vec_store_lane_little ty x @@ -2011,13 +2013,13 @@ (vec_store_lane ty src addr lane_imm)) ;; On z15, we have instructions to perform little-endian stores. -(rule (vec_store_lane_little (and (vxrs_ext2_enabled) +(rule 1 (vec_store_lane_little (and (vxrs_ext2_enabled) ty @ (multi_lane 16 _)) src addr lane_imm) (vec_store_lane_rev ty src addr lane_imm)) -(rule (vec_store_lane_little (and (vxrs_ext2_enabled) +(rule 1 (vec_store_lane_little (and (vxrs_ext2_enabled) ty @ (multi_lane 32 _)) src addr lane_imm) (vec_store_lane_rev ty src addr lane_imm)) -(rule (vec_store_lane_little (and (vxrs_ext2_enabled) +(rule 1 (vec_store_lane_little (and (vxrs_ext2_enabled) ty @ (multi_lane 64 _)) src addr lane_imm) (vec_store_lane_rev ty src addr lane_imm)) @@ -2036,29 +2038,29 @@ ;;;; Rules for `splat` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Load replicated value from general-purpose register. -(rule (lower (has_type ty (splat x @ (value_type in_ty)))) +(rule 1 (lower (has_type ty (splat x @ (value_type in_ty)))) (if (ty_int_bool_ref_scalar_64 in_ty)) (vec_replicate_lane ty (vec_insert_lane_undef ty x 0 (zero_reg)) 0)) ;; Load replicated value from floating-point register. -(rule (lower (has_type ty (splat +(rule 0 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _))))) (vec_replicate_lane ty x 0)) ;; Load replicated value from vector lane. -(rule (lower (has_type ty (splat (extractlane x (u8_from_uimm8 idx))))) +(rule 2 (lower (has_type ty (splat (extractlane x (u8_from_uimm8 idx))))) (vec_replicate_lane ty x (be_lane_idx ty idx))) ;; Load replicated 16-bit immediate value. -(rule (lower (has_type ty (splat (i16_from_value x)))) +(rule 3 (lower (has_type ty (splat (i16_from_value x)))) (vec_imm_replicate ty x)) ;; Load replicated value from big-endian memory. -(rule (lower (has_type ty (splat (sinkable_load x)))) +(rule 4 (lower (has_type ty (splat (sinkable_load x)))) (vec_load_replicate ty (sink_load x))) ;; Load replicated value from little-endian memory. -(rule (lower (has_type ty (splat (sinkable_load_little x)))) +(rule 5 (lower (has_type ty (splat (sinkable_load_little x)))) (vec_load_replicate_little ty (sink_load x))) @@ -2070,13 +2072,13 @@ (vec_load_replicate ty addr)) ;; On z15, we have instructions to perform little-endian loads. -(rule (vec_load_replicate_little (and (vxrs_ext2_enabled) +(rule 1 (vec_load_replicate_little (and (vxrs_ext2_enabled) ty @ (multi_lane 16 _)) addr) (vec_load_replicate_rev ty addr)) -(rule (vec_load_replicate_little (and (vxrs_ext2_enabled) +(rule 1 (vec_load_replicate_little (and (vxrs_ext2_enabled) ty @ (multi_lane 32 _)) addr) (vec_load_replicate_rev ty addr)) -(rule (vec_load_replicate_little (and (vxrs_ext2_enabled) +(rule 1 (vec_load_replicate_little (and (vxrs_ext2_enabled) ty @ (multi_lane 64 _)) addr) (vec_load_replicate_rev ty addr)) @@ -2095,31 +2097,31 @@ ;;;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Load scalar value from general-purpose register. -(rule (lower (has_type ty (scalar_to_vector +(rule 1 (lower (has_type ty (scalar_to_vector x @ (value_type in_ty)))) (if (ty_int_bool_ref_scalar_64 in_ty)) (vec_insert_lane ty (vec_imm ty 0) x (be_lane_idx ty 0) (zero_reg))) ;; Load scalar value from floating-point register. -(rule (lower (has_type ty (scalar_to_vector +(rule 0 (lower (has_type ty (scalar_to_vector x @ (value_type (ty_scalar_float _))))) (vec_move_lane_and_zero ty (be_lane_idx ty 0) x 0)) ;; Load scalar value from vector lane. -(rule (lower (has_type ty (scalar_to_vector +(rule 2 (lower (has_type ty (scalar_to_vector (extractlane x (u8_from_uimm8 idx))))) (vec_move_lane_and_zero ty (be_lane_idx ty 0) x (be_lane_idx ty idx))) ;; Load scalar 16-bit immediate value. -(rule (lower (has_type ty (scalar_to_vector (i16_from_value x)))) +(rule 3 (lower (has_type ty (scalar_to_vector (i16_from_value x)))) (vec_insert_lane_imm ty (vec_imm ty 0) x (be_lane_idx ty 0))) ;; Load scalar value from big-endian memory. -(rule (lower (has_type ty (scalar_to_vector (sinkable_load x)))) +(rule 4 (lower (has_type ty (scalar_to_vector (sinkable_load x)))) (vec_load_lane ty (vec_imm ty 0) (sink_load x) (be_lane_idx ty 0))) ;; Load scalar value lane from little-endian memory. -(rule (lower (has_type ty (scalar_to_vector (sinkable_load_little x)))) +(rule 5 (lower (has_type ty (scalar_to_vector (sinkable_load_little x)))) (vec_load_lane_little ty (vec_imm ty 0) (sink_load x) (be_lane_idx ty 0))) @@ -2133,12 +2135,12 @@ (vec_permute_dw_imm ty (vec_imm ty 0) 0 src src_idx)) ;; If source and destination index are the same, simply mask to this lane. -(rule (vec_move_lane_and_zero ty idx src idx) +(rule -1 (vec_move_lane_and_zero ty idx src idx) (vec_and ty src (vec_imm_byte_mask ty (lane_byte_mask ty idx)))) ;; Otherwise replicate source first and then mask to the lane. -(rule (vec_move_lane_and_zero ty dst_idx src src_idx) +(rule -2 (vec_move_lane_and_zero ty dst_idx src src_idx) (vec_and ty (vec_replicate_lane ty src src_idx) (vec_imm_byte_mask ty (lane_byte_mask ty dst_idx)))) @@ -2146,216 +2148,152 @@ ;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; General case: use vec_permute and then mask off zero lanes. -(rule (lower (shuffle x y (shuffle_mask permute_mask and_mask))) +(rule -2 (lower (shuffle x y (shuffle_mask permute_mask and_mask))) (vec_and $I8X16 (vec_imm_byte_mask $I8X16 and_mask) (vec_permute $I8X16 x y (vec_imm $I8X16 permute_mask)))) ;; If the pattern has no zero lanes, just a vec_permute suffices. -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) +(rule -1 (lower (shuffle x y (shuffle_mask permute_mask 65535))) (vec_permute $I8X16 x y (vec_imm $I8X16 permute_mask))) ;; Special patterns that can be implemented via MERGE HIGH. -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23) 65535))) (vec_merge_high $I64X2 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 1 2 3 16 17 18 19 4 5 6 7 20 21 22 23) 65535))) (vec_merge_high $I32X4 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 1 16 17 2 3 18 19 4 5 20 21 6 7 22 23) 65535))) (vec_merge_high $I16X8 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 16 1 17 2 18 3 19 4 20 5 21 6 22 7 23) 65535))) (vec_merge_high $I8X16 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 17 18 19 20 21 22 23 0 1 2 3 4 5 6 7) 65535))) (vec_merge_high $I64X2 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 17 18 19 0 1 2 3 20 21 22 23 4 5 6 7) 65535))) (vec_merge_high $I32X4 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 17 0 1 18 19 2 3 20 21 4 5 22 23 6 7) 65535))) (vec_merge_high $I16X8 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 0 17 1 18 2 19 3 20 4 21 5 22 6 23 7) 65535))) (vec_merge_high $I8X16 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 1 2 3 4 5 6 7 0 1 2 3 4 5 6 7) 65535))) (vec_merge_high $I64X2 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 1 2 3 0 1 2 3 4 5 6 7 4 5 6 7) 65535))) (vec_merge_high $I32X4 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 1 0 1 2 3 2 3 4 5 4 5 6 7 6 7) 65535))) (vec_merge_high $I16X8 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7) 65535))) (vec_merge_high $I8X16 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 17 18 19 20 21 22 23 16 17 18 19 20 21 22 23) 65535))) (vec_merge_high $I64X2 y y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 17 18 19 16 17 18 19 20 21 22 23 20 21 22 23) 65535))) (vec_merge_high $I32X4 y y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 17 16 17 18 19 18 19 20 21 20 21 22 23 22 23) 65535))) (vec_merge_high $I16X8 y y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23) 65535))) (vec_merge_high $I8X16 y y)) ;; Special patterns that can be implemented via MERGE LOW. -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31) 65535))) (vec_merge_low $I64X2 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 9 10 11 24 25 26 27 12 13 14 15 28 29 30 31) 65535))) (vec_merge_low $I32X4 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 9 24 25 10 11 26 27 12 13 28 29 14 15 30 31) 65535))) (vec_merge_low $I16X8 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 24 9 25 10 26 11 27 12 28 13 29 14 30 15 31) 65535))) (vec_merge_low $I8X16 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 25 26 27 28 29 30 31 8 9 10 11 12 13 14 15) 65535))) (vec_merge_low $I64X2 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 25 26 27 8 9 10 11 28 29 30 31 12 13 14 15) 65535))) (vec_merge_low $I32X4 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 25 8 9 26 27 10 11 28 29 12 13 30 31 14 15) 65535))) (vec_merge_low $I16X8 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 8 25 9 26 10 27 11 28 12 29 13 30 14 31 15) 65535))) (vec_merge_low $I8X16 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 9 10 11 12 13 14 15 8 9 10 11 12 13 14 15) 65535))) (vec_merge_low $I64X2 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 9 10 11 8 9 10 11 12 13 14 15 12 13 14 15) 65535))) (vec_merge_low $I32X4 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 9 8 9 10 11 10 11 12 13 12 13 14 15 14 15) 65535))) (vec_merge_low $I16X8 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 8 9 9 10 10 11 11 12 12 13 13 14 14 15 15) 65535))) (vec_merge_low $I8X16 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 25 26 27 28 29 30 31 24 25 26 27 28 29 30 31) 65535))) (vec_merge_low $I64X2 y y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 25 26 27 24 25 26 27 28 29 30 31 28 29 30 31) 65535))) (vec_merge_low $I32X4 y y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 25 24 25 26 27 26 27 28 29 28 29 30 31 30 31) 65535))) (vec_merge_low $I16X8 y y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 24 25 25 26 26 27 27 28 28 29 29 30 30 31 31) 65535))) (vec_merge_low $I8X16 y y)) ;; Special patterns that can be implemented via PACK. -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 4 5 6 7 12 13 14 15 20 21 22 23 28 29 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 4 5 6 7 12 13 14 15 20 21 22 23 28 29 30 31) 65535))) (vec_pack $I64X2 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 2 3 6 7 10 11 14 15 18 19 22 23 26 27 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 2 3 6 7 10 11 14 15 18 19 22 23 26 27 30 31) 65535))) (vec_pack $I32X4 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 1 3 5 7 9 11 13 15 17 19 21 23 25 27 29 31) 65535))) (vec_pack $I16X8 x y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 20 21 22 23 28 29 30 31 4 5 6 7 12 13 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 20 21 22 23 28 29 30 31 4 5 6 7 12 13 14 15) 65535))) (vec_pack $I64X2 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 18 19 22 23 26 27 30 31 2 3 6 7 10 11 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 18 19 22 23 26 27 30 31 2 3 6 7 10 11 14 15) 65535))) (vec_pack $I32X4 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 17 19 21 23 25 27 29 31 1 3 5 7 9 11 13 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 17 19 21 23 25 27 29 31 1 3 5 7 9 11 13 15) 65535))) (vec_pack $I16X8 y x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 4 5 6 7 12 13 14 15 4 5 6 7 12 13 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 4 5 6 7 12 13 14 15 4 5 6 7 12 13 14 15) 65535))) (vec_pack $I64X2 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 2 3 6 7 10 11 14 15 2 3 6 7 10 11 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 2 3 6 7 10 11 14 15 2 3 6 7 10 11 14 15) 65535))) (vec_pack $I32X4 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 1 3 5 7 9 11 13 15 1 3 5 7 9 11 13 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 1 3 5 7 9 11 13 15 1 3 5 7 9 11 13 15) 65535))) (vec_pack $I16X8 x x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 20 21 22 23 28 29 30 31 20 21 22 23 28 29 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 20 21 22 23 28 29 30 31 20 21 22 23 28 29 30 31) 65535))) (vec_pack $I64X2 y y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 18 19 22 23 26 27 30 31 18 19 22 23 26 27 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 18 19 22 23 26 27 30 31 18 19 22 23 26 27 30 31) 65535))) (vec_pack $I32X4 y y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 17 19 21 23 25 27 29 31 17 19 21 23 25 27 29 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 17 19 21 23 25 27 29 31 17 19 21 23 25 27 29 31) 65535))) (vec_pack $I16X8 y y)) ;; Special patterns that can be implemented via UNPACK HIGH. -(rule (lower (shuffle x y (shuffle_mask permute_mask 3855))) - (if-let (imm8x16 _ _ _ _ 0 1 2 3 _ _ _ _ 4 5 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ _ _ _ 0 1 2 3 _ _ _ _ 4 5 6 7) 3855))) (vec_unpacku_high $I32X4 x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 13107))) - (if-let (imm8x16 _ _ 0 1 _ _ 2 3 _ _ 4 5 _ _ 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ _ 0 1 _ _ 2 3 _ _ 4 5 _ _ 6 7) 13107))) (vec_unpacku_high $I16X8 x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 21845))) - (if-let (imm8x16 _ 0 _ 1 _ 2 _ 3 _ 4 _ 5 _ 6 _ 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ 0 _ 1 _ 2 _ 3 _ 4 _ 5 _ 6 _ 7) 21845))) (vec_unpacku_high $I8X16 x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 3855))) - (if-let (imm8x16 _ _ _ _ 16 17 18 19 _ _ _ _ 20 21 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ _ _ _ 16 17 18 19 _ _ _ _ 20 21 22 23) 3855))) (vec_unpacku_high $I32X4 y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 13107))) - (if-let (imm8x16 _ _ 16 17 _ _ 18 19 _ _ 20 21 _ _ 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ _ 16 17 _ _ 18 19 _ _ 20 21 _ _ 22 23) 13107))) (vec_unpacku_high $I16X8 y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 21845))) - (if-let (imm8x16 _ 16 _ 17 _ 18 _ 19 _ 20 _ 21 _ 22 _ 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ 16 _ 17 _ 18 _ 19 _ 20 _ 21 _ 22 _ 23) 21845))) (vec_unpacku_high $I8X16 y)) ;; Special patterns that can be implemented via UNPACK LOW. -(rule (lower (shuffle x y (shuffle_mask permute_mask 3855))) - (if-let (imm8x16 _ _ _ _ 8 9 10 11 _ _ _ _ 12 13 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ _ _ _ 8 9 10 11 _ _ _ _ 12 13 14 15) 3855))) (vec_unpacku_low $I32X4 x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 13107))) - (if-let (imm8x16 _ _ 8 9 _ _ 10 11 _ _ 12 13 _ _ 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ _ 8 9 _ _ 10 11 _ _ 12 13 _ _ 14 15) 13107))) (vec_unpacku_low $I16X8 x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 21845))) - (if-let (imm8x16 _ 8 _ 9 _ 10 _ 11 _ 12 _ 13 _ 14 _ 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ 8 _ 9 _ 10 _ 11 _ 12 _ 13 _ 14 _ 15) 21845))) (vec_unpacku_low $I8X16 x)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 3855))) - (if-let (imm8x16 _ _ _ _ 24 25 26 27 _ _ _ _ 28 29 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ _ _ _ 24 25 26 27 _ _ _ _ 28 29 30 31) 3855))) (vec_unpacku_low $I32X4 y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 13107))) - (if-let (imm8x16 _ _ 24 25 _ _ 26 27 _ _ 28 29 _ _ 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ _ 24 25 _ _ 26 27 _ _ 28 29 _ _ 30 31) 13107))) (vec_unpacku_low $I16X8 y)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 21845))) - (if-let (imm8x16 _ 24 _ 25 _ 26 _ 27 _ 28 _ 29 _ 30 _ 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 _ 24 _ 25 _ 26 _ 27 _ 28 _ 29 _ 30 _ 31) 21845))) (vec_unpacku_low $I8X16 y)) ;; Special patterns that can be implemented via PERMUTE DOUBLEWORD IMMEDIATE. -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 1 2 3 4 5 6 7 24 25 26 27 28 29 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 1 2 3 4 5 6 7 24 25 26 27 28 29 30 31) 65535))) (vec_permute_dw_imm $I8X16 x 0 y 1)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23) 65535))) (vec_permute_dw_imm $I8X16 x 1 y 0)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 17 18 19 20 21 22 23 8 9 10 11 12 13 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 17 18 19 20 21 22 23 8 9 10 11 12 13 14 15) 65535))) (vec_permute_dw_imm $I8X16 y 0 x 1)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 25 26 27 28 29 30 31 0 1 2 3 4 5 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 25 26 27 28 29 30 31 0 1 2 3 4 5 6 7) 65535))) (vec_permute_dw_imm $I8X16 y 1 x 0)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15) 65535))) (vec_permute_dw_imm $I8X16 x 0 x 1)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 8 9 10 11 12 13 14 15 0 1 2 3 4 5 6 7) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 8 9 10 11 12 13 14 15 0 1 2 3 4 5 6 7) 65535))) (vec_permute_dw_imm $I8X16 x 1 x 0)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31) 65535))) (vec_permute_dw_imm $I8X16 y 0 y 1)) -(rule (lower (shuffle x y (shuffle_mask permute_mask 65535))) - (if-let (imm8x16 24 25 26 27 28 29 30 31 16 17 18 19 20 21 22 23) permute_mask) +(rule (lower (shuffle x y (shuffle_mask (imm8x16 24 25 26 27 28 29 30 31 16 17 18 19 20 21 22 23) 65535))) (vec_permute_dw_imm $I8X16 y 1 y 0)) @@ -2368,7 +2306,7 @@ ;; permute-lane-element := umin (16, swizzle-lane-element) ;; and pass a zero vector as second operand to the permute instruction. -(rule (lower (has_type (ty_vec128 ty) (swizzle x y))) +(rule 1 (lower (has_type (ty_vec128 ty) (swizzle x y))) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_permute ty x (vec_imm ty 0) (vec_umin $I8X16 (vec_imm_splat $I8X16 16) y))) @@ -2404,7 +2342,7 @@ ;;;; Rules for `func_addr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Load the address of a function, target reachable via PC-relative instruction. -(rule (lower (func_addr (func_ref_data _ name (reloc_distance_near)))) +(rule 1 (lower (func_addr (func_ref_data _ name (reloc_distance_near)))) (load_addr (memarg_symbol name 0 (memflags_trusted)))) ;; Load the address of a function, general case. @@ -2415,7 +2353,7 @@ ;;;; Rules for `symbol_value` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Load the address of a symbol, target reachable via PC-relative instruction. -(rule (lower (symbol_value (symbol_value_data name (reloc_distance_near) +(rule 1 (lower (symbol_value (symbol_value_data name (reloc_distance_near) off))) (if-let offset (memarg_symbol_offset off)) (load_addr (memarg_symbol name offset (memflags_trusted)))) @@ -2463,7 +2401,7 @@ (zext32_mem $I16 (lower_address flags addr offset))) ;; Load 16-bit little-endian integers. -(rule (lower (has_type $I16 (load flags @ (littleendian) addr offset))) +(rule -1 (lower (has_type $I16 (load flags @ (littleendian) addr offset))) (loadrev16 (lower_address flags addr offset))) ;; Load 32-bit big-endian integers. @@ -2471,7 +2409,7 @@ (load32 (lower_address flags addr offset))) ;; Load 32-bit little-endian integers. -(rule (lower (has_type $I32 (load flags @ (littleendian) addr offset))) +(rule -1 (lower (has_type $I32 (load flags @ (littleendian) addr offset))) (loadrev32 (lower_address flags addr offset))) ;; Load 64-bit big-endian integers. @@ -2479,7 +2417,7 @@ (load64 (lower_address flags addr offset))) ;; Load 64-bit little-endian integers. -(rule (lower (has_type $I64 (load flags @ (littleendian) addr offset))) +(rule -1 (lower (has_type $I64 (load flags @ (littleendian) addr offset))) (loadrev64 (lower_address flags addr offset))) ;; Load 64-bit big-endian references. @@ -2487,7 +2425,7 @@ (load64 (lower_address flags addr offset))) ;; Load 64-bit little-endian references. -(rule (lower (has_type $R64 (load flags @ (littleendian) addr offset))) +(rule -1 (lower (has_type $R64 (load flags @ (littleendian) addr offset))) (loadrev64 (lower_address flags addr offset))) ;; Load 32-bit big-endian floating-point values (as vector lane). @@ -2495,7 +2433,7 @@ (vec_load_lane_undef $F32X4 (lower_address flags addr offset) 0)) ;; Load 32-bit little-endian floating-point values (as vector lane). -(rule (lower (has_type $F32 (load flags @ (littleendian) addr offset))) +(rule -1 (lower (has_type $F32 (load flags @ (littleendian) addr offset))) (vec_load_lane_little_undef $F32X4 (lower_address flags addr offset) 0)) ;; Load 64-bit big-endian floating-point values (as vector lane). @@ -2503,26 +2441,26 @@ (vec_load_lane_undef $F64X2 (lower_address flags addr offset) 0)) ;; Load 64-bit little-endian floating-point values (as vector lane). -(rule (lower (has_type $F64 (load flags @ (littleendian) addr offset))) +(rule -1 (lower (has_type $F64 (load flags @ (littleendian) addr offset))) (vec_load_lane_little_undef $F64X2 (lower_address flags addr offset) 0)) ;; Load 128-bit big-endian vector values, BE lane order - direct load. -(rule (lower (has_type (vr128_ty ty) (load flags @ (bigendian) addr offset))) +(rule 4 (lower (has_type (vr128_ty ty) (load flags @ (bigendian) addr offset))) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_load ty (lower_address flags addr offset))) ;; Load 128-bit little-endian vector values, BE lane order - byte-reversed load. -(rule (lower (has_type (vr128_ty ty) (load flags @ (littleendian) addr offset))) +(rule 3 (lower (has_type (vr128_ty ty) (load flags @ (littleendian) addr offset))) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_load_byte_rev ty flags addr offset)) ;; Load 128-bit big-endian vector values, LE lane order - element-reversed load. -(rule (lower (has_type (vr128_ty ty) (load flags @ (bigendian) addr offset))) +(rule 2 (lower (has_type (vr128_ty ty) (load flags @ (bigendian) addr offset))) (if-let (LaneOrder.LittleEndian) (lane_order)) (vec_load_elt_rev ty flags addr offset)) ;; Load 128-bit little-endian vector values, LE lane order - fully-reversed load. -(rule (lower (has_type (vr128_ty ty) (load flags @ (littleendian) addr offset))) +(rule 1 (lower (has_type (vr128_ty ty) (load flags @ (littleendian) addr offset))) (if-let (LaneOrder.LittleEndian) (lane_order)) (vec_load_full_rev ty flags addr offset)) @@ -2531,7 +2469,7 @@ (decl vec_load_full_rev (Type MemFlags Value Offset32) Reg) ;; Full-vector byte-reversed load via single instruction on z15. -(rule (vec_load_full_rev (and (vxrs_ext2_enabled) (vr128_ty ty)) flags addr offset) +(rule 1 (vec_load_full_rev (and (vxrs_ext2_enabled) (vr128_ty ty)) flags addr offset) (vec_loadrev ty (lower_address flags addr offset))) ;; Full-vector byte-reversed load via GPRs on z14. @@ -2547,7 +2485,7 @@ (decl vec_load_byte_rev (Type MemFlags Value Offset32) Reg) ;; Element-wise byte-reversed 1x128-bit load is a full byte-reversed load. -(rule (vec_load_byte_rev $I128 flags addr offset) +(rule -1 (vec_load_byte_rev $I128 flags addr offset) (vec_load_full_rev $I128 flags addr offset)) ;; Element-wise byte-reversed 16x8-bit load is a direct load. @@ -2555,13 +2493,13 @@ (vec_load ty (lower_address flags addr offset))) ;; Element-wise byte-reversed load via single instruction on z15. -(rule (vec_load_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 64 2)) +(rule 1 (vec_load_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 64 2)) flags addr offset) (vec_load_byte64rev ty (lower_address flags addr offset))) -(rule (vec_load_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 32 4)) +(rule 1 (vec_load_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 32 4)) flags addr offset) (vec_load_byte32rev ty (lower_address flags addr offset))) -(rule (vec_load_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 16 8)) +(rule 1 (vec_load_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 16 8)) flags addr offset) (vec_load_byte16rev ty (lower_address flags addr offset))) @@ -2582,7 +2520,7 @@ ;; Element-reversed 1x128-bit load is a direct load. ;; For 1x128-bit types, this is a direct load. -(rule (vec_load_elt_rev $I128 flags addr offset) +(rule -1 (vec_load_elt_rev $I128 flags addr offset) (vec_load $I128 (lower_address flags addr offset))) ;; Element-reversed 16x8-bit load is a full byte-reversed load. @@ -2590,13 +2528,13 @@ (vec_load_full_rev ty flags addr offset)) ;; Element-reversed load via single instruction on z15. -(rule (vec_load_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 64 2)) +(rule 1 (vec_load_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 64 2)) flags addr offset) (vec_load_elt64rev ty (lower_address flags addr offset))) -(rule (vec_load_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 32 4)) +(rule 1 (vec_load_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 32 4)) flags addr offset) (vec_load_elt32rev ty (lower_address flags addr offset))) -(rule (vec_load_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 16 8)) +(rule 1 (vec_load_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 16 8)) flags addr offset) (vec_load_elt16rev ty (lower_address flags addr offset))) @@ -2619,7 +2557,7 @@ (zext32_mem $I8 (lower_address flags addr offset))) ;; 64-bit target types. -(rule (lower (has_type (gpr64_ty _ty) (uload8 flags addr offset))) +(rule 1 (lower (has_type (gpr64_ty _ty) (uload8 flags addr offset))) (zext64_mem $I8 (lower_address flags addr offset))) @@ -2630,30 +2568,30 @@ (sext32_mem $I8 (lower_address flags addr offset))) ;; 64-bit target types. -(rule (lower (has_type (gpr64_ty _ty) (sload8 flags addr offset))) +(rule 1 (lower (has_type (gpr64_ty _ty) (sload8 flags addr offset))) (sext64_mem $I8 (lower_address flags addr offset))) ;;;; Rules for `uload16` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 32-bit target type, big-endian source value. -(rule (lower (has_type (gpr32_ty _ty) +(rule 3 (lower (has_type (gpr32_ty _ty) (uload16 flags @ (bigendian) addr offset))) (zext32_mem $I16 (lower_address flags addr offset))) ;; 32-bit target type, little-endian source value (via explicit extension). -(rule (lower (has_type (gpr32_ty _ty) +(rule 1 (lower (has_type (gpr32_ty _ty) (uload16 flags @ (littleendian) addr offset))) (let ((reg16 Reg (loadrev16 (lower_address flags addr offset)))) (zext32_reg $I16 reg16))) ;; 64-bit target type, big-endian source value. -(rule (lower (has_type (gpr64_ty _ty) +(rule 4 (lower (has_type (gpr64_ty _ty) (uload16 flags @ (bigendian) addr offset))) (zext64_mem $I16 (lower_address flags addr offset))) ;; 64-bit target type, little-endian source value (via explicit extension). -(rule (lower (has_type (gpr64_ty _ty) +(rule 2 (lower (has_type (gpr64_ty _ty) (uload16 flags @ (littleendian) addr offset))) (let ((reg16 Reg (loadrev16 (lower_address flags addr offset)))) (zext64_reg $I16 reg16))) @@ -2662,23 +2600,23 @@ ;;;; Rules for `sload16` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 32-bit target type, big-endian source value. -(rule (lower (has_type (gpr32_ty _ty) +(rule 2 (lower (has_type (gpr32_ty _ty) (sload16 flags @ (bigendian) addr offset))) (sext32_mem $I16 (lower_address flags addr offset))) ;; 32-bit target type, little-endian source value (via explicit extension). -(rule (lower (has_type (gpr32_ty _ty) +(rule 0 (lower (has_type (gpr32_ty _ty) (sload16 flags @ (littleendian) addr offset))) (let ((reg16 Reg (loadrev16 (lower_address flags addr offset)))) (sext32_reg $I16 reg16))) ;; 64-bit target type, big-endian source value. -(rule (lower (has_type (gpr64_ty _ty) +(rule 3 (lower (has_type (gpr64_ty _ty) (sload16 flags @ (bigendian) addr offset))) (sext64_mem $I16 (lower_address flags addr offset))) ;; 64-bit target type, little-endian source value (via explicit extension). -(rule (lower (has_type (gpr64_ty _ty) +(rule 1 (lower (has_type (gpr64_ty _ty) (sload16 flags @ (littleendian) addr offset))) (let ((reg16 Reg (loadrev16 (lower_address flags addr offset)))) (sext64_reg $I16 reg16))) @@ -2687,7 +2625,7 @@ ;;;; Rules for `uload32` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 64-bit target type, big-endian source value. -(rule (lower (has_type (gpr64_ty _ty) +(rule 1 (lower (has_type (gpr64_ty _ty) (uload32 flags @ (bigendian) addr offset))) (zext64_mem $I32 (lower_address flags addr offset))) @@ -2701,7 +2639,7 @@ ;;;; Rules for `sload32` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 64-bit target type, big-endian source value. -(rule (lower (has_type (gpr64_ty _ty) +(rule 1 (lower (has_type (gpr64_ty _ty) (sload32 flags @ (bigendian) addr offset))) (sext64_mem $I32 (lower_address flags addr offset))) @@ -2743,12 +2681,12 @@ (decl load_v64 (Type MemFlags Value Offset32) Reg) ;; Any big-endian source value, BE lane order. -(rule (load_v64 _ flags @ (bigendian) addr offset) +(rule -1 (load_v64 _ flags @ (bigendian) addr offset) (if-let (LaneOrder.BigEndian) (lane_order)) (vec_load_lane_undef $I64X2 (lower_address flags addr offset) 0)) ;; Any little-endian source value, LE lane order. -(rule (load_v64 _ flags @ (littleendian) addr offset) +(rule -2 (load_v64 _ flags @ (littleendian) addr offset) (if-let (LaneOrder.LittleEndian) (lane_order)) (vec_load_lane_little_undef $I64X2 (lower_address flags addr offset) 0)) @@ -2758,7 +2696,7 @@ (vec_load_lane_undef $I64X2 (lower_address flags addr offset) 0)) ;; Big-endian or little-endian 8x8-bit source value, LE lane order. -(rule (load_v64 (multi_lane 8 16) flags addr offset) +(rule 1 (load_v64 (multi_lane 8 16) flags addr offset) (if-let (LaneOrder.LittleEndian) (lane_order)) (vec_load_lane_little_undef $I64X2 (lower_address flags addr offset) 0)) @@ -2769,7 +2707,7 @@ (vec_load_lane_undef $I64X2 (lower_address flags addr offset) 0) 8)) ;; Big-endian 4x16-bit source value, LE lane order. -(rule (load_v64 (multi_lane 16 8) flags @ (bigendian) addr offset) +(rule 1 (load_v64 (multi_lane 16 8) flags @ (bigendian) addr offset) (if-let (LaneOrder.LittleEndian) (lane_order)) (vec_rot_imm $I16X8 (vec_load_lane_little_undef $I64X2 (lower_address flags addr offset) 0) 8)) @@ -2781,7 +2719,7 @@ (vec_load_lane_little_undef $I64X2 (lower_address flags addr offset) 0) 32)) ;; Big-endian 2x32-bit source value, LE lane order. -(rule (load_v64 (multi_lane 32 4) flags @ (bigendian) addr offset) +(rule 1 (load_v64 (multi_lane 32 4) flags @ (bigendian) addr offset) (if-let (LaneOrder.LittleEndian) (lane_order)) (vec_rot_imm $I64X2 (vec_load_lane_undef $I64X2 (lower_address flags addr offset) 0) 32)) @@ -2813,7 +2751,7 @@ (side_effect (istore64_impl flags val addr offset))) ;; Store 32-bit big-endian floating-point type (as vector lane). -(rule (lower (store flags @ (bigendian) +(rule -1 (lower (store flags @ (bigendian) val @ (value_type $F32) addr offset)) (side_effect (vec_store_lane $F32X4 val (lower_address flags addr offset) 0))) @@ -2825,7 +2763,7 @@ (lower_address flags addr offset) 0))) ;; Store 64-bit big-endian floating-point type (as vector lane). -(rule (lower (store flags @ (bigendian) +(rule -1 (lower (store flags @ (bigendian) val @ (value_type $F64) addr offset)) (side_effect (vec_store_lane $F64X2 val (lower_address flags addr offset) 0))) @@ -2837,25 +2775,25 @@ (lower_address flags addr offset) 0))) ;; Store 128-bit big-endian vector type, BE lane order - direct store. -(rule (lower (store flags @ (bigendian) +(rule 4 (lower (store flags @ (bigendian) val @ (value_type (vr128_ty ty)) addr offset)) (if-let (LaneOrder.BigEndian) (lane_order)) (side_effect (vec_store val (lower_address flags addr offset)))) ;; Store 128-bit little-endian vector type, BE lane order - byte-reversed store. -(rule (lower (store flags @ (littleendian) +(rule 3 (lower (store flags @ (littleendian) val @ (value_type (vr128_ty ty)) addr offset)) (if-let (LaneOrder.BigEndian) (lane_order)) (side_effect (vec_store_byte_rev ty val flags addr offset))) ;; Store 128-bit big-endian vector type, LE lane order - element-reversed store. -(rule (lower (store flags @ (bigendian) +(rule 2 (lower (store flags @ (bigendian) val @ (value_type (vr128_ty ty)) addr offset)) (if-let (LaneOrder.LittleEndian) (lane_order)) (side_effect (vec_store_elt_rev ty val flags addr offset))) ;; Store 128-bit little-endian vector type, LE lane order - fully-reversed store. -(rule (lower (store flags @ (littleendian) +(rule 1 (lower (store flags @ (littleendian) val @ (value_type (vr128_ty ty)) addr offset)) (if-let (LaneOrder.LittleEndian) (lane_order)) (side_effect (vec_store_full_rev ty val flags addr offset))) @@ -2865,7 +2803,7 @@ (decl vec_store_full_rev (Type Reg MemFlags Value Offset32) SideEffectNoResult) ;; Full-vector byte-reversed store via single instruction on z15. -(rule (vec_store_full_rev (vxrs_ext2_enabled) val flags addr offset) +(rule 1 (vec_store_full_rev (vxrs_ext2_enabled) val flags addr offset) (vec_storerev val (lower_address flags addr offset))) ;; Full-vector byte-reversed store via GPRs on z14. @@ -2882,7 +2820,7 @@ (decl vec_store_byte_rev (Type Reg MemFlags Value Offset32) SideEffectNoResult) ;; Element-wise byte-reversed 1x128-bit store is a full byte-reversed store. -(rule (vec_store_byte_rev $I128 val flags addr offset) +(rule -1 (vec_store_byte_rev $I128 val flags addr offset) (vec_store_full_rev $I128 val flags addr offset)) ;; Element-wise byte-reversed 16x8-bit store is a direct store. @@ -2890,13 +2828,13 @@ (vec_store val (lower_address flags addr offset))) ;; Element-wise byte-reversed store via single instruction on z15. -(rule (vec_store_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 64 2)) +(rule 1 (vec_store_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 64 2)) val flags addr offset) (vec_store_byte64rev val (lower_address flags addr offset))) -(rule (vec_store_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 32 4)) +(rule 1 (vec_store_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 32 4)) val flags addr offset) (vec_store_byte32rev val (lower_address flags addr offset))) -(rule (vec_store_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 16 8)) +(rule 1 (vec_store_byte_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 16 8)) val flags addr offset) (vec_store_byte16rev val (lower_address flags addr offset))) @@ -2916,7 +2854,7 @@ (decl vec_store_elt_rev (Type Reg MemFlags Value Offset32) SideEffectNoResult) ;; Element-reversed 1x128-bit store is a direct store. -(rule (vec_store_elt_rev $I128 val flags addr offset) +(rule -1 (vec_store_elt_rev $I128 val flags addr offset) (vec_store val (lower_address flags addr offset))) ;; Element-reversed 16x8-bit store is a full byte-reversed store. @@ -2924,13 +2862,13 @@ (vec_store_full_rev ty val flags addr offset)) ;; Element-reversed store via single instruction on z15. -(rule (vec_store_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 64 2)) +(rule 1 (vec_store_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 64 2)) val flags addr offset) (vec_store_elt64rev val (lower_address flags addr offset))) -(rule (vec_store_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 32 4)) +(rule 1 (vec_store_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 32 4)) val flags addr offset) (vec_store_elt32rev val (lower_address flags addr offset))) -(rule (vec_store_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 16 8)) +(rule 1 (vec_store_elt_rev (and (vxrs_ext2_enabled) ty @ (multi_lane 16 8)) val flags addr offset) (vec_store_elt16rev val (lower_address flags addr offset))) @@ -2960,7 +2898,7 @@ (store8 (put_in_reg val) (lower_address flags addr offset))) ;; Store 8-bit integer types, immediate input. -(rule (istore8_impl flags (u8_from_value imm) addr offset) +(rule 1 (istore8_impl flags (u8_from_value imm) addr offset) (store8_imm imm (lower_address flags addr offset))) @@ -2974,19 +2912,19 @@ (decl istore16_impl (MemFlags Value Value Offset32) SideEffectNoResult) ;; Store 16-bit big-endian integer types, register input. -(rule (istore16_impl flags @ (bigendian) val addr offset) +(rule 2 (istore16_impl flags @ (bigendian) val addr offset) (store16 (put_in_reg val) (lower_address flags addr offset))) ;; Store 16-bit little-endian integer types, register input. -(rule (istore16_impl flags @ (littleendian) val addr offset) +(rule 0 (istore16_impl flags @ (littleendian) val addr offset) (storerev16 (put_in_reg val) (lower_address flags addr offset))) ;; Store 16-bit big-endian integer types, immediate input. -(rule (istore16_impl flags @ (bigendian) (i16_from_value imm) addr offset) +(rule 3 (istore16_impl flags @ (bigendian) (i16_from_value imm) addr offset) (store16_imm imm (lower_address flags addr offset))) ;; Store 16-bit little-endian integer types, immediate input. -(rule (istore16_impl flags @ (littleendian) (i16_from_swapped_value imm) addr offset) +(rule 1 (istore16_impl flags @ (littleendian) (i16_from_swapped_value imm) addr offset) (store16_imm imm (lower_address flags addr offset))) @@ -3000,15 +2938,15 @@ (decl istore32_impl (MemFlags Value Value Offset32) SideEffectNoResult) ;; Store 32-bit big-endian integer types, register input. -(rule (istore32_impl flags @ (bigendian) val addr offset) +(rule 1 (istore32_impl flags @ (bigendian) val addr offset) (store32 (put_in_reg val) (lower_address flags addr offset))) ;; Store 32-bit big-endian integer types, immediate input. -(rule (istore32_impl flags @ (bigendian) (i16_from_value imm) addr offset) +(rule 2 (istore32_impl flags @ (bigendian) (i16_from_value imm) addr offset) (store32_simm16 imm (lower_address flags addr offset))) ;; Store 32-bit little-endian integer types. -(rule (istore32_impl flags @ (littleendian) val addr offset) +(rule 0 (istore32_impl flags @ (littleendian) val addr offset) (storerev32 (put_in_reg val) (lower_address flags addr offset))) @@ -3018,15 +2956,15 @@ (decl istore64_impl (MemFlags Value Value Offset32) SideEffectNoResult) ;; Store 64-bit big-endian integer types, register input. -(rule (istore64_impl flags @ (bigendian) val addr offset) +(rule 1 (istore64_impl flags @ (bigendian) val addr offset) (store64 (put_in_reg val) (lower_address flags addr offset))) ;; Store 64-bit big-endian integer types, immediate input. -(rule (istore64_impl flags @ (bigendian) (i16_from_value imm) addr offset) +(rule 2 (istore64_impl flags @ (bigendian) (i16_from_value imm) addr offset) (store64_simm16 imm (lower_address flags addr offset))) ;; Store 64-bit little-endian integer types. -(rule (istore64_impl flags @ (littleendian) val addr offset) +(rule 0 (istore64_impl flags @ (littleendian) val addr offset) (storerev64 (put_in_reg val) (lower_address flags addr offset))) @@ -3035,7 +2973,7 @@ ;; Atomic operations that do not require a compare-and-swap loop. ;; Atomic AND for 32/64-bit big-endian types, using a single instruction. -(rule (lower (has_type (ty_32_or_64 ty) +(rule 1 (lower (has_type (ty_32_or_64 ty) (atomic_rmw flags @ (bigendian) (AtomicRmwOp.And) addr src))) (atomic_rmw_and ty (put_in_reg src) (lower_address flags addr (zero_offset)))) @@ -3047,7 +2985,7 @@ (lower_address flags addr (zero_offset))))) ;; Atomic OR for 32/64-bit big-endian types, using a single instruction. -(rule (lower (has_type (ty_32_or_64 ty) +(rule 1 (lower (has_type (ty_32_or_64 ty) (atomic_rmw flags @ (bigendian) (AtomicRmwOp.Or) addr src))) (atomic_rmw_or ty (put_in_reg src) (lower_address flags addr (zero_offset)))) @@ -3059,7 +2997,7 @@ (lower_address flags addr (zero_offset))))) ;; Atomic XOR for 32/64-bit big-endian types, using a single instruction. -(rule (lower (has_type (ty_32_or_64 ty) +(rule 1 (lower (has_type (ty_32_or_64 ty) (atomic_rmw flags @ (bigendian) (AtomicRmwOp.Xor) addr src))) (atomic_rmw_xor ty (put_in_reg src) (lower_address flags addr (zero_offset)))) @@ -3086,7 +3024,7 @@ ;; Atomic operations that require a compare-and-swap loop. ;; Operations for 32/64-bit types can use a fullword compare-and-swap loop. -(rule (lower (has_type (ty_32_or_64 ty) (atomic_rmw flags op addr src))) +(rule -1 (lower (has_type (ty_32_or_64 ty) (atomic_rmw flags op addr src))) (let ((src_reg Reg (put_in_reg src)) (addr_reg Reg (put_in_reg addr)) ;; Create body of compare-and-swap loop. @@ -3098,7 +3036,7 @@ (casloop ib ty flags addr_reg val1))) ;; Operations for 8/16-bit types must operate on the surrounding aligned word. -(rule (lower (has_type (ty_8_or_16 ty) (atomic_rmw flags op addr src))) +(rule -2 (lower (has_type (ty_8_or_16 ty) (atomic_rmw flags op addr src))) (let ((src_reg Reg (put_in_reg src)) (addr_reg Reg (put_in_reg addr)) ;; Prepare access to surrounding aligned word. @@ -3120,10 +3058,10 @@ ;; Loop bodies for 32-/64-bit atomic XCHG operations. ;; Simply use the source (possibly byte-swapped) as new target value. -(rule (atomic_rmw_body ib (ty_32_or_64 ty) (bigendian) +(rule 2 (atomic_rmw_body ib (ty_32_or_64 ty) (bigendian) (AtomicRmwOp.Xchg) tmp val src) src) -(rule (atomic_rmw_body ib (ty_32_or_64 ty) (littleendian) +(rule 1 (atomic_rmw_body ib (ty_32_or_64 ty) (littleendian) (AtomicRmwOp.Xchg) tmp val src) (bswap_reg ty src)) @@ -3131,17 +3069,17 @@ ;; On z15 this can use the NN(G)RK instruction. On z14, perform an And ;; operation and invert the result. In the little-endian case, we can ;; simply byte-swap the source operand. -(rule (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (bigendian) +(rule 4 (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (bigendian) (AtomicRmwOp.Nand) tmp val src) (push_alu_reg ib (aluop_not_and ty) tmp val src)) -(rule (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (littleendian) +(rule 3 (atomic_rmw_body ib (and (mie2_enabled) (ty_32_or_64 ty)) (littleendian) (AtomicRmwOp.Nand) tmp val src) (push_alu_reg ib (aluop_not_and ty) tmp val (bswap_reg ty src))) -(rule (atomic_rmw_body ib (and (mie2_disabled) (ty_32_or_64 ty)) (bigendian) +(rule 2 (atomic_rmw_body ib (and (mie2_disabled) (ty_32_or_64 ty)) (bigendian) (AtomicRmwOp.Nand) tmp val src) (push_not_reg ib ty tmp (push_alu_reg ib (aluop_and ty) tmp val src))) -(rule (atomic_rmw_body ib (and (mie2_disabled) (ty_32_or_64 ty)) (littleendian) +(rule 1 (atomic_rmw_body ib (and (mie2_disabled) (ty_32_or_64 ty)) (littleendian) (AtomicRmwOp.Nand) tmp val src) (push_not_reg ib ty tmp (push_alu_reg ib (aluop_and ty) tmp val (bswap_reg ty src)))) @@ -3169,7 +3107,7 @@ (push_rxsbg ib op tmp val src 32 40 24)) ;; 16-bit big-endian case: use the low two bytes of "src" and the ;; high two bytes of "val". -(rule (atomic_rmw_body_rxsbg ib $I16 (bigendian) op tmp val src) +(rule 1 (atomic_rmw_body_rxsbg ib $I16 (bigendian) op tmp val src) (push_rxsbg ib op tmp val src 32 48 16)) ;; 16-bit little-endian case: use the low two bytes of "src", byte-swapped ;; so they end up in the high two bytes, and the low two bytes of "val". @@ -3182,7 +3120,7 @@ (rule (atomic_rmw_body_invert ib $I8 _ tmp val) (push_xor_uimm32shifted ib $I32 tmp val (uimm32shifted 0xff000000 0))) ;; 16-bit big-endian case: invert the two high bytes. -(rule (atomic_rmw_body_invert ib $I16 (bigendian) tmp val) +(rule 1 (atomic_rmw_body_invert ib $I16 (bigendian) tmp val) (push_xor_uimm32shifted ib $I32 tmp val (uimm32shifted 0xffff0000 0))) ;; 16-bit little-endian case: invert the two low bytes. (rule (atomic_rmw_body_invert ib $I16 (littleendian) tmp val) @@ -3198,11 +3136,11 @@ (decl atomic_rmw_body_addsub (VecMInstBuilder Type MemFlags ALUOp WritableReg Reg Reg) Reg) ;; 32/64-bit big-endian case: just a regular add/sub operation. -(rule (atomic_rmw_body_addsub ib (ty_32_or_64 ty) (bigendian) op tmp val src) +(rule 2 (atomic_rmw_body_addsub ib (ty_32_or_64 ty) (bigendian) op tmp val src) (push_alu_reg ib op tmp val src)) ;; 32/64-bit little-endian case: byte-swap the value loaded from memory before ;; and after performing the operation in native endianness. -(rule (atomic_rmw_body_addsub ib (ty_32_or_64 ty) (littleendian) op tmp val src) +(rule 1 (atomic_rmw_body_addsub ib (ty_32_or_64 ty) (littleendian) op tmp val src) (let ((val_swapped Reg (push_bswap_reg ib ty tmp val)) (res_swapped Reg (push_alu_reg ib op tmp val_swapped src))) (push_bswap_reg ib ty tmp res_swapped))) @@ -3212,7 +3150,7 @@ (let ((src_shifted Reg (lshl_imm $I32 src 24))) (push_alu_reg ib op tmp val src_shifted))) ;; 16-bit big-endian case: similar, just shift the source by 16 bits. -(rule (atomic_rmw_body_addsub ib $I16 (bigendian) op tmp val src) +(rule 3 (atomic_rmw_body_addsub ib $I16 (bigendian) op tmp val src) (let ((src_shifted Reg (lshl_imm $I32 src 16))) (push_alu_reg ib op tmp val src_shifted))) ;; 16-bit little-endian case: the same, but in addition we need to byte-swap @@ -3246,14 +3184,14 @@ ;; 32/64-bit big-endian case: just a comparison followed by a conditional ;; break out of the loop if the memory value does not need to change. ;; If it does need to change, the new value is simply the source operand. -(rule (atomic_rmw_body_minmax ib (ty_32_or_64 ty) (bigendian) +(rule 2 (atomic_rmw_body_minmax ib (ty_32_or_64 ty) (bigendian) op cond tmp val src) (let ((_ Reg (push_break_if ib (cmp_rr op src val) (invert_cond cond)))) src)) ;; 32/64-bit little-endian case: similar, but we need to byte-swap the ;; memory value before the comparison. If we need to store the new value, ;; it also needs to be byte-swapped. -(rule (atomic_rmw_body_minmax ib (ty_32_or_64 ty) (littleendian) +(rule 1 (atomic_rmw_body_minmax ib (ty_32_or_64 ty) (littleendian) op cond tmp val src) (let ((val_swapped Reg (push_bswap_reg ib ty tmp val)) (_ Reg (push_break_if ib (cmp_rr op src val_swapped) @@ -3271,7 +3209,7 @@ (invert_cond cond)))) (push_rxsbg ib (RxSBGOp.Insert) tmp val src_shifted 32 40 0))) ;; 16-bit big-endian case: similar, just shift the source by 16 bits. -(rule (atomic_rmw_body_minmax ib $I16 (bigendian) op cond tmp val src) +(rule 3 (atomic_rmw_body_minmax ib $I16 (bigendian) op cond tmp val src) (let ((src_shifted Reg (lshl_imm $I32 src 16)) (_ Reg (push_break_if ib (cmp_rr op src_shifted val) (invert_cond cond)))) @@ -3291,14 +3229,14 @@ ;;;; Rules for `atomic_cas` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; 32/64-bit big-endian atomic compare-and-swap instruction. -(rule (lower (has_type (ty_32_or_64 ty) +(rule 2 (lower (has_type (ty_32_or_64 ty) (atomic_cas flags @ (bigendian) addr src1 src2))) (atomic_cas_impl ty (put_in_reg src1) (put_in_reg src2) (lower_address flags addr (zero_offset)))) ;; 32/64-bit little-endian atomic compare-and-swap instruction. ;; Implemented by byte-swapping old/new inputs and the output. -(rule (lower (has_type (ty_32_or_64 ty) +(rule 1 (lower (has_type (ty_32_or_64 ty) (atomic_cas flags @ (littleendian) addr src1 src2))) (bswap_reg ty (atomic_cas_impl ty (bswap_reg ty (put_in_reg src1)) (bswap_reg ty (put_in_reg src2)) @@ -3337,7 +3275,7 @@ ;; 16-bit big-endian case: Same as above, except with values in the high ;; two bytes of "val" and low two bytes of "src1" and "src2". -(rule (atomic_cas_body ib $I16 (bigendian) tmp val src1 src2) +(rule 1 (atomic_cas_body ib $I16 (bigendian) tmp val src1 src2) (let ((_ Reg (push_break_if ib (rxsbg_test (RxSBGOp.Xor) val src1 32 48 16) (intcc_as_cond (IntCC.NotEqual))))) (push_rxsbg ib (RxSBGOp.Insert) tmp val src2 32 48 16))) @@ -3366,7 +3304,7 @@ (zext32_mem $I8 (lower_address flags addr (zero_offset)))) ;; 16-bit big-endian atomic load. -(rule (lower (has_type $I16 (atomic_load flags @ (bigendian) addr))) +(rule 1 (lower (has_type $I16 (atomic_load flags @ (bigendian) addr))) (zext32_mem $I16 (lower_address flags addr (zero_offset)))) ;; 16-bit little-endian atomic load. @@ -3374,7 +3312,7 @@ (loadrev16 (lower_address flags addr (zero_offset)))) ;; 32-bit big-endian atomic load. -(rule (lower (has_type $I32 (atomic_load flags @ (bigendian) addr))) +(rule 1 (lower (has_type $I32 (atomic_load flags @ (bigendian) addr))) (load32 (lower_address flags addr (zero_offset)))) ;; 32-bit little-endian atomic load. @@ -3382,7 +3320,7 @@ (loadrev32 (lower_address flags addr (zero_offset)))) ;; 64-bit big-endian atomic load. -(rule (lower (has_type $I64 (atomic_load flags @ (bigendian) addr))) +(rule 1 (lower (has_type $I64 (atomic_load flags @ (bigendian) addr))) (load64 (lower_address flags addr (zero_offset)))) ;; 64-bit little-endian atomic load. @@ -3451,7 +3389,7 @@ ;; Main `icmp` entry point. Generate a `ProducesBool` capturing the ;; integer comparison and immediately lower it to a 0/1 integer result. ;; In this case, it is safe to sink memory loads. -(rule (lower (has_type (fits_in_64 ty) (icmp int_cc x y))) +(rule -1 (lower (has_type (fits_in_64 ty) (icmp int_cc x y))) (lower_bool ty (icmp_val $true int_cc x y))) @@ -3461,10 +3399,10 @@ (decl icmp_val (bool IntCC Value Value) ProducesBool) ;; Dispatch for signed comparisons. -(rule (icmp_val allow_mem int_cc @ (signed) x @ (value_type (fits_in_64 _)) y) +(rule -1 (icmp_val allow_mem int_cc @ (signed) x @ (value_type (fits_in_64 _)) y) (bool (icmps_val allow_mem x y) (intcc_as_cond int_cc))) ;; Dispatch for unsigned comparisons. -(rule (icmp_val allow_mem int_cc @ (unsigned) x @ (value_type (fits_in_64 _)) y) +(rule -2 (icmp_val allow_mem int_cc @ (unsigned) x @ (value_type (fits_in_64 _)) y) (bool (icmpu_val allow_mem x y) (intcc_as_cond int_cc))) @@ -3472,31 +3410,31 @@ (decl icmps_val (bool Value Value) ProducesFlags) ;; Compare (signed) two registers. -(rule (icmps_val _ x @ (value_type (fits_in_64 ty)) y) +(rule 0 (icmps_val _ x @ (value_type (fits_in_64 ty)) y) (icmps_reg (ty_ext32 ty) (put_in_reg_sext32 x) (put_in_reg_sext32 y))) ;; Compare (signed) a register and a sign-extended register. -(rule (icmps_val _ x @ (value_type (fits_in_64 ty)) (sext32_value y)) +(rule 3 (icmps_val _ x @ (value_type (fits_in_64 ty)) (sext32_value y)) (icmps_reg_sext32 ty x y)) ;; Compare (signed) a register and an immediate. -(rule (icmps_val _ x @ (value_type (fits_in_64 ty)) (i16_from_value y)) +(rule 2 (icmps_val _ x @ (value_type (fits_in_64 ty)) (i16_from_value y)) (icmps_simm16 (ty_ext32 ty) (put_in_reg_sext32 x) y)) -(rule (icmps_val _ x @ (value_type (fits_in_64 ty)) (i32_from_value y)) +(rule 1 (icmps_val _ x @ (value_type (fits_in_64 ty)) (i32_from_value y)) (icmps_simm32 (ty_ext32 ty) (put_in_reg_sext32 x) y)) ;; Compare (signed) a register and memory (32/64-bit types). -(rule (icmps_val $true x @ (value_type (fits_in_64 ty)) (sinkable_load_32_64 y)) +(rule 4 (icmps_val $true x @ (value_type (fits_in_64 ty)) (sinkable_load_32_64 y)) (icmps_mem ty x (sink_load y))) ;; Compare (signed) a register and memory (16-bit types). -(rule (icmps_val $true x @ (value_type (fits_in_64 ty)) (sinkable_load_16 y)) +(rule 5 (icmps_val $true x @ (value_type (fits_in_64 ty)) (sinkable_load_16 y)) (icmps_mem_sext16 (ty_ext32 ty) (put_in_reg_sext32 x) (sink_load y))) ;; Compare (signed) a register and sign-extended memory. -(rule (icmps_val $true x @ (value_type (fits_in_64 ty)) (sinkable_sload16 y)) +(rule 4 (icmps_val $true x @ (value_type (fits_in_64 ty)) (sinkable_sload16 y)) (icmps_mem_sext16 ty x (sink_sload16 y))) -(rule (icmps_val $true x @ (value_type (fits_in_64 ty)) (sinkable_sload32 y)) +(rule 4 (icmps_val $true x @ (value_type (fits_in_64 ty)) (sinkable_sload32 y)) (icmps_mem_sext32 ty x (sink_sload32 y))) @@ -3508,21 +3446,21 @@ (icmpu_reg (ty_ext32 ty) (put_in_reg_zext32 x) (put_in_reg_zext32 y))) ;; Compare (unsigned) a register and a sign-extended register. -(rule (icmpu_val _ x @ (value_type (fits_in_64 ty)) (zext32_value y)) +(rule 1 (icmpu_val _ x @ (value_type (fits_in_64 ty)) (zext32_value y)) (icmpu_reg_zext32 ty x y)) ;; Compare (unsigned) a register and an immediate. -(rule (icmpu_val _ x @ (value_type (fits_in_64 ty)) (u32_from_value y)) +(rule 2 (icmpu_val _ x @ (value_type (fits_in_64 ty)) (u32_from_value y)) (icmpu_uimm32 (ty_ext32 ty) (put_in_reg_zext32 x) y)) ;; Compare (unsigned) a register and memory (32/64-bit types). -(rule (icmpu_val $true x @ (value_type (fits_in_64 ty)) (sinkable_load_32_64 y)) +(rule 4 (icmpu_val $true x @ (value_type (fits_in_64 ty)) (sinkable_load_32_64 y)) (icmpu_mem ty x (sink_load y))) ;; Compare (unsigned) a register and memory (16-bit types). ;; Note that the ISA only provides instructions with a PC-relative memory ;; address here, so we need to check whether the sinkable load matches this. -(rule (icmpu_val $true x @ (value_type (fits_in_64 ty)) +(rule 3 (icmpu_val $true x @ (value_type (fits_in_64 ty)) (sinkable_load_16 ld)) (if-let y (load_sym ld)) (icmpu_mem_zext16 (ty_ext32 ty) (put_in_reg_zext32 x) (sink_load y))) @@ -3530,11 +3468,11 @@ ;; Compare (unsigned) a register and zero-extended memory. ;; Note that the ISA only provides instructions with a PC-relative memory ;; address here, so we need to check whether the sinkable load matches this. -(rule (icmpu_val $true x @ (value_type (fits_in_64 ty)) +(rule 3 (icmpu_val $true x @ (value_type (fits_in_64 ty)) (sinkable_uload16 ld)) (if-let y (uload16_sym ld)) (icmpu_mem_zext16 ty x (sink_uload16 y))) -(rule (icmpu_val $true x @ (value_type (fits_in_64 ty)) (sinkable_uload32 y)) +(rule 3 (icmpu_val $true x @ (value_type (fits_in_64 ty)) (sinkable_uload32 y)) (icmpu_mem_zext32 ty x (sink_uload32 y))) @@ -3599,7 +3537,7 @@ ;; Main `fcmp` entry point. Generate a `ProducesBool` capturing the ;; integer comparison and immediately lower it to a 0/1 integer result. -(rule (lower (has_type (fits_in_64 ty) (fcmp float_cc x y))) +(rule -1 (lower (has_type (fits_in_64 ty) (fcmp float_cc x y))) (lower_bool ty (fcmp_val float_cc x y))) ;; Return a `ProducesBool` to implement any floating-point comparison. @@ -3650,7 +3588,7 @@ ;; Return a `ProducesBool` to implement `vall_true`. (decl vall_true_val (Value) ProducesBool) -(rule (vall_true_val x @ (value_type ty)) +(rule -1 (vall_true_val x @ (value_type ty)) (bool (vec_cmpeqs ty x (vec_imm ty 0)) (floatcc_as_cond (FloatCC.Unordered)))) @@ -3728,7 +3666,7 @@ ;; Return a `ProducesBool` to implement `vany_true`. (decl vany_true_val (Value) ProducesBool) -(rule (vany_true_val x @ (value_type ty)) +(rule -1 (vany_true_val x @ (value_type ty)) (bool (vec_cmpeqs ty x (vec_imm ty 0)) (floatcc_as_cond (FloatCC.NotEqual)))) @@ -3804,7 +3742,7 @@ (let ((mask Reg (vec_imm $I8X16 (imm8x16 0 8 16 24 32 40 48 56 64 72 80 88 96 104 112 120)))) (vec_extract_lane $I64X2 (vec_bitpermute x mask) 0 (zero_reg)))) -(rule (lower (vhigh_bits x @ (value_type (multi_lane 8 16)))) +(rule 1 (lower (vhigh_bits x @ (value_type (multi_lane 8 16)))) (if-let (LaneOrder.BigEndian) (lane_order)) (let ((mask Reg (vec_imm $I8X16 (imm8x16 120 112 104 96 88 80 72 64 56 48 40 32 24 16 8 0)))) @@ -3815,7 +3753,7 @@ (let ((mask Reg (vec_imm $I8X16 (imm8x16 128 128 128 128 128 128 128 128 0 16 32 48 64 80 96 112)))) (vec_extract_lane $I64X2 (vec_bitpermute x mask) 0 (zero_reg)))) -(rule (lower (vhigh_bits x @ (value_type (multi_lane 16 8)))) +(rule 1 (lower (vhigh_bits x @ (value_type (multi_lane 16 8)))) (if-let (LaneOrder.BigEndian) (lane_order)) (let ((mask Reg (vec_imm $I8X16 (imm8x16 128 128 128 128 128 128 128 128 112 96 80 64 48 32 16 0)))) @@ -3826,7 +3764,7 @@ (let ((mask Reg (vec_imm $I8X16 (imm8x16 128 128 128 128 128 128 128 128 128 128 128 128 0 32 64 96)))) (vec_extract_lane $I64X2 (vec_bitpermute x mask) 0 (zero_reg)))) -(rule (lower (vhigh_bits x @ (value_type (multi_lane 32 4)))) +(rule 1 (lower (vhigh_bits x @ (value_type (multi_lane 32 4)))) (if-let (LaneOrder.BigEndian) (lane_order)) (let ((mask Reg (vec_imm $I8X16 (imm8x16 128 128 128 128 128 128 128 128 128 128 128 128 96 64 32 0)))) @@ -3837,7 +3775,7 @@ (let ((mask Reg (vec_imm $I8X16 (imm8x16 128 128 128 128 128 128 128 128 128 128 128 128 128 128 0 64)))) (vec_extract_lane $I64X2 (vec_bitpermute x mask) 0 (zero_reg)))) -(rule (lower (vhigh_bits x @ (value_type (multi_lane 64 2)))) +(rule 1 (lower (vhigh_bits x @ (value_type (multi_lane 64 2)))) (if-let (LaneOrder.BigEndian) (lane_order)) (let ((mask Reg (vec_imm $I8X16 (imm8x16 128 128 128 128 128 128 128 128 128 128 128 128 128 128 64 0)))) @@ -3868,13 +3806,13 @@ (rule (value_nonzero (bint val)) (value_nonzero val)) (rule (value_nonzero (icmp int_cc x y)) (icmp_val $false int_cc x y)) (rule (value_nonzero (fcmp float_cc x y)) (fcmp_val float_cc x y)) -(rule (value_nonzero val @ (value_type (gpr32_ty ty))) +(rule -1 (value_nonzero val @ (value_type (gpr32_ty ty))) (bool (icmps_simm16 $I32 (put_in_reg_sext32 val) 0) (intcc_as_cond (IntCC.NotEqual)))) -(rule (value_nonzero val @ (value_type (gpr64_ty ty))) +(rule -2 (value_nonzero val @ (value_type (gpr64_ty ty))) (bool (icmps_simm16 $I64 (put_in_reg val) 0) (intcc_as_cond (IntCC.NotEqual)))) -(rule (value_nonzero val @ (value_type (vr128_ty ty))) +(rule -3 (value_nonzero val @ (value_type (vr128_ty ty))) (bool (vec_cmpeqs $I64X2 val (vec_imm $I64X2 0)) (floatcc_as_cond (FloatCC.NotEqual)))) @@ -4059,7 +3997,7 @@ ;;;; Rules for `call` and `call_indirect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Direct call to an in-range function. -(rule (lower (call (func_ref_data sig_ref name (reloc_distance_near)) args)) +(rule 1 (lower (call (func_ref_data sig_ref name (reloc_distance_near)) args)) (let ((abi Sig (abi_sig sig_ref)) (_ Unit (abi_accumulate_outgoing_args_size abi)) (_ InstOutput (lower_call_args abi (range 0 (abi_num_args abi)) args)) @@ -4111,7 +4049,7 @@ ;; Lower function arguments (part 3): implicit return-area pointer. (decl lower_call_ret_arg (Sig) InstOutput) (rule (lower_call_ret_arg (abi_no_ret_arg)) (output_none)) -(rule (lower_call_ret_arg abi @ (abi_ret_arg (abi_arg_only_slot slot))) +(rule 1 (lower_call_ret_arg abi @ (abi_ret_arg (abi_arg_only_slot slot))) (let ((ret_arg Reg (load_addr (memarg_stack_off (abi_sized_stack_arg_space abi) 0))) (_ Unit (copy_reg_to_arg_slot 0 slot ret_arg))) (output_none)))