diff --git a/crates/polkavm-assembler/src/amd64.rs b/crates/polkavm-assembler/src/amd64.rs index 99a8a16e..b40a6766 100644 --- a/crates/polkavm-assembler/src/amd64.rs +++ b/crates/polkavm-assembler/src/amd64.rs @@ -9,6 +9,7 @@ const REX_EXT_MODRM_REG: u8 = REX | (1 << 2); const REX_EXT_MODRM_SIB_INDEX: u8 = REX | (1 << 1); const REX_EXT_MODRM_RM: u8 = REX | (1 << 0); +const PREFIX_REP: u8 = 0xf3; const PREFIX_OVERRIDE_SEGMENT_FS: u8 = 0x64; const PREFIX_OVERRIDE_SEGMENT_GS: u8 = 0x65; const PREFIX_OVERRIDE_OP_SIZE: u8 = 0x66; @@ -443,6 +444,7 @@ impl From for RegMem { } struct Inst { + op_rep_prefix: bool, override_op_size: bool, override_addr_size: bool, op_alt: bool, @@ -463,6 +465,7 @@ impl Inst { #[inline] const fn new(opcode: u8) -> Self { Inst { + op_rep_prefix: false, override_op_size: false, override_addr_size: false, op_alt: false, @@ -484,6 +487,12 @@ impl Inst { Inst::new(opcode | reg.modrm_rm_bits()).rex_from_reg(reg) } + #[inline] + const fn op_rep_prefix(mut self) -> Self { + self.op_rep_prefix = true; + self + } + #[inline] const fn override_op_size(mut self) -> Self { self.override_op_size = true; @@ -696,6 +705,10 @@ impl Inst { #[inline(always)] fn encode_into(self, buf: &mut InstBuf) { + if self.op_rep_prefix { + buf.append(PREFIX_REP); + } + match self.override_segment { Some(SegReg::fs) => buf.append(PREFIX_OVERRIDE_SEGMENT_FS), Some(SegReg::gs) => buf.append(PREFIX_OVERRIDE_SEGMENT_GS), @@ -1282,6 +1295,21 @@ pub mod inst { None, (fmt.write_fmt(core::format_args!("mov {}, {}", self.1.name_from(self.0), self.2.name_from(self.0)))), + movsx_8_to_64(Reg, Reg) => + Inst::new(0xbe).op_alt().rex_64b().modrm_rm_direct(self.1).modrm_reg(self.0).encode(), + None, + (fmt.write_fmt(core::format_args!("movsx {}, {}", self.0.name(), self.1.name32()))), + + movsx_16_to_64(Reg, Reg) => + Inst::new(0xbf).op_alt().rex_64b().modrm_rm_direct(self.1).modrm_reg(self.0).encode(), + None, + (fmt.write_fmt(core::format_args!("movsx {}, {}", self.0.name(), self.1.name32()))), + + movzx_16_to_64(Reg, Reg) => + Inst::new(0xb7).op_alt().rex_64b().modrm_rm_direct(self.1).modrm_reg(self.0).encode(), + None, + (fmt.write_fmt(core::format_args!("movzx {}, {}", self.0.name(), self.1.name32()))), + movsxd_32_to_64(Reg, Reg) => Inst::new(0x63).rex_64b().modrm_rm_direct(self.1).modrm_reg(self.0).encode(), None, @@ -1517,6 +1545,58 @@ pub mod inst { None, (fmt.write_fmt(core::format_args!("ror {}, 0x{:x}", self.1.display(Size::from(self.0)), self.2))), + rol_cl(RegSize, RegMem) => + Inst::new(0xd3).rex_64b_if(matches!(self.0, RegSize::R64)).regmem(self.1).modrm_opext(0b000).encode(), + None, + (fmt.write_fmt(core::format_args!("rol {}, cl", self.1.display(Size::from(self.0))))), + + ror_cl(RegSize, RegMem) => + Inst::new(0xd3).rex_64b_if(matches!(self.0, RegSize::R64)).regmem(self.1).modrm_opext(0b001).encode(), + None, + (fmt.write_fmt(core::format_args!("ror {}, cl", self.1.display(Size::from(self.0))))), + + // https://www.felixcloutier.com/x86/popcnt + popcnt(RegSize, Reg, RegMem) => + { + Inst::new(0xb8) + .op_rep_prefix() + .op_alt() + .rex_64b_if(matches!(self.0, RegSize::R64)).modrm_reg(self.1).regmem(self.2).encode() + }, + None, + (fmt.write_fmt(core::format_args!("popcnt {}, {}", self.1.name_from(self.0), self.2.display(Size::from(self.0))))), + + // https://www.felixcloutier.com/x86/lzcnt + lzcnt(RegSize, Reg, RegMem) => + { + Inst::new(0xbd) + .op_rep_prefix() + .op_alt() + .rex_64b_if(matches!(self.0, RegSize::R64)).modrm_reg(self.1).regmem(self.2).encode() + }, + None, + (fmt.write_fmt(core::format_args!("lzcnt {}, {}", self.1.name_from(self.0), self.2.display(Size::from(self.0))))), + + // https://www.felixcloutier.com/x86/tzcnt + tzcnt(RegSize, Reg, RegMem) => + { + Inst::new(0xbc) + .op_rep_prefix() + .op_alt() + .rex_64b_if(matches!(self.0, RegSize::R64)).modrm_reg(self.1).regmem(self.2).encode() + }, + None, + (fmt.write_fmt(core::format_args!("tzcnt {}, {}", self.1.name_from(self.0), self.2.display(Size::from(self.0))))), + + // https://www.felixcloutier.com/x86/bswap + bswap(RegSize, Reg) => + { + Inst::with_reg_in_op(0xc8, self.1) + .op_alt().encode() + }, + None, + (fmt.write_fmt(core::format_args!("bswap {}", self.1.name_from(self.0)))), + // https://www.felixcloutier.com/x86/test test(Operands) => { @@ -2282,6 +2362,8 @@ mod tests { push_imm, ret, ror_imm, + rol_cl, + ror_cl, sar_cl, sar_imm, setcc, diff --git a/crates/polkavm/src/compiler.rs b/crates/polkavm/src/compiler.rs index fb4af69d..dbef6d19 100644 --- a/crates/polkavm/src/compiler.rs +++ b/crates/polkavm/src/compiler.rs @@ -112,6 +112,7 @@ where step_label: Label, trap_label: Label, invalid_jump_label: Label, + or_combine_label: Label, instruction_set: RuntimeInstructionSet, _phantom: PhantomData<(S, B)>, @@ -218,6 +219,7 @@ where let step_label = asm.forward_declare_label(); let jump_table_label = asm.forward_declare_label(); let sbrk_label = asm.forward_declare_label(); + let or_combine_label = asm.forward_declare_label(); polkavm_common::static_assert!(polkavm_common::zygote::VM_SANDBOX_MAXIMUM_NATIVE_CODE_SIZE < u32::MAX); @@ -244,6 +246,7 @@ where step_label, jump_table_label, sbrk_label, + or_combine_label, gas_metering: config.gas_metering, step_tracing, program_counter_to_machine_code_offset_list, @@ -258,6 +261,7 @@ where ArchVisitor(&mut visitor).emit_trap_trampoline(); ArchVisitor(&mut visitor).emit_ecall_trampoline(); ArchVisitor(&mut visitor).emit_sbrk_trampoline(); + ArchVisitor(&mut visitor).emit_or_combine_trampoline(); if step_tracing { ArchVisitor(&mut visitor).emit_step_trampoline(); @@ -544,48 +548,81 @@ where { type ReturnTy = (); - fn and_inverted(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn and_inverted(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.and_inverted(d, s1, s2); + ArchVisitor(self).and_inverted(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn or_inverted(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn or_inverted(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.or_inverted(d, s1, s2); + ArchVisitor(self).or_inverted(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn xnor(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn xnor(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.xnor(d, s1, s2); + ArchVisitor(self).xnor(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn maximum(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn maximum(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.maximum(d, s1, s2); + ArchVisitor(self).maximum(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn maximum_unsigned(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn maximum_unsigned(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.maximum_unsigned(d, s1, s2); + ArchVisitor(self).maximum_unsigned(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn minimum(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn minimum(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.minimum(d, s1, s2); + ArchVisitor(self).minimum(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn minimum_unsigned(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn minimum_unsigned(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.minimum_unsigned(d, s1, s2); + ArchVisitor(self).minimum_unsigned(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn rotate_left(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn rotate_left(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.rotate_left(d, s1, s2); + ArchVisitor(self).rotate_left(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn rotate_left_word(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn rotate_left_word(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.rotate_left_word(d, s1, s2); + ArchVisitor(self).rotate_left_word(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn rotate_right(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn rotate_right(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.rotate_right(d, s1, s2); + ArchVisitor(self).rotate_right(d, s1, s2); + self.after_instruction::(code_offset, args_length); } - fn rotate_right_word(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn rotate_right_word(&mut self, code_offset: u32, args_length: u32, d: RawReg, s1: RawReg, s2: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.rotate_right_word(d, s1, s2); + ArchVisitor(self).rotate_right_word(d, s1, s2); + self.after_instruction::(code_offset, args_length); } #[inline(always)] @@ -1045,48 +1082,81 @@ where self.after_instruction::(code_offset, args_length); } - fn count_leading_zero_bits(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn count_leading_zero_bits(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.count_leading_zero_bits(d, s); + ArchVisitor(self).count_leading_zero_bits(d, s); + self.after_instruction::(code_offset, args_length); } - fn count_leading_zero_bits_word(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn count_leading_zero_bits_word(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.count_leading_zero_bits_word(d, s); + ArchVisitor(self).count_leading_zero_bits_word(d, s); + self.after_instruction::(code_offset, args_length); } - fn count_trailing_zero_bits(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn count_trailing_zero_bits(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.count_trailing_zero_bits(d, s); + ArchVisitor(self).count_trailing_zero_bits(d, s); + self.after_instruction::(code_offset, args_length); } - fn count_trailing_zero_bits_word(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn count_trailing_zero_bits_word(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.count_trailing_zero_bits_word(d, s); + ArchVisitor(self).count_trailing_zero_bits_word(d, s); + self.after_instruction::(code_offset, args_length); } - fn count_set_bits(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn count_set_bits(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.count_set_bits(d, s); + ArchVisitor(self).count_set_bits(d, s); + self.after_instruction::(code_offset, args_length); } - fn count_set_bits_word(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn count_set_bits_word(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.count_set_bits_word(d, s); + ArchVisitor(self).count_set_bits_word(d, s); + self.after_instruction::(code_offset, args_length); } - fn sign_extend_byte(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn sign_extend_byte(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.sign_extend_byte(d, s); + ArchVisitor(self).sign_extend_byte(d, s); + self.after_instruction::(code_offset, args_length); } - fn sign_extend_half_word(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn sign_extend_half_word(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.sign_extend_half_word(d, s); + ArchVisitor(self).sign_extend_half_word(d, s); + self.after_instruction::(code_offset, args_length); } - fn zero_extend_half_word(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn zero_extend_half_word(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.zero_extend_half_word(d, s); + ArchVisitor(self).zero_extend_half_word(d, s); + self.after_instruction::(code_offset, args_length); } - fn or_combine_byte(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn or_combine_byte(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.or_combine_byte(d, s); + ArchVisitor(self).or_combine_byte(d, s); + self.after_instruction::(code_offset, args_length); } - fn reverse_byte(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn reverse_byte(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.reverse_byte(d, s); + ArchVisitor(self).reverse_byte(d, s); + self.after_instruction::(code_offset, args_length); } #[inline(always)] @@ -1122,23 +1192,35 @@ where } #[inline(always)] - fn rotate_right_imm(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: u32) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn rotate_right_imm(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg, c: u32) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.rotate_right_imm(d, s, c); + ArchVisitor(self).rotate_right_imm(d, s, c); + self.after_instruction::(code_offset, args_length); } #[inline(always)] - fn rotate_right_imm_alt(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: u32) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn rotate_right_imm_alt(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg, c: u32) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.rotate_right_imm_alt(d, s, c); + ArchVisitor(self).rotate_right_imm_alt(d, s, c); + self.after_instruction::(code_offset, args_length); } #[inline(always)] - fn rotate_right_word_imm(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: u32) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn rotate_right_word_imm(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg, c: u32) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.rotate_right_word_imm(d, s, c); + ArchVisitor(self).rotate_right_word_imm(d, s, c); + self.after_instruction::(code_offset, args_length); } #[inline(always)] - fn rotate_right_word_imm_alt(&mut self, code_offset: u32, args_length: u32, _: RawReg, _: RawReg, _: u32) -> Self::ReturnTy { - self.trap(code_offset, args_length) + fn rotate_right_word_imm_alt(&mut self, code_offset: u32, args_length: u32, d: RawReg, s: RawReg, c: u32) -> Self::ReturnTy { + self.before_instruction(code_offset); + self.gas_visitor.rotate_right_word_imm_alt(d, s, c); + ArchVisitor(self).rotate_right_word_imm_alt(d, s, c); + self.after_instruction::(code_offset, args_length); } #[inline(always)] diff --git a/crates/polkavm/src/compiler/amd64.rs b/crates/polkavm/src/compiler/amd64.rs index 29b54d34..316d0785 100644 --- a/crates/polkavm/src/compiler/amd64.rs +++ b/crates/polkavm/src/compiler/amd64.rs @@ -701,6 +701,41 @@ where self.push(ret()); } + pub(crate) fn emit_or_combine_trampoline(&mut self) { + log::trace!("Emitting trampoline: or_combine"); + let label = self.or_combine_label; + let reg_size = RegSize::R64; + + self.define_label(label); + self.push(push(TMP_REG)); + self.save_registers_to_vmctx(); + + self.push(pop(rdi)); + self.push(or((rdi, imm32(0xff)))); + self.push(test((TMP_REG, imm32(0xff)))); + self.push(cmov(Condition::NotEqual, reg_size, TMP_REG, rdi)); + + self.push(mov(reg_size, rdi, TMP_REG)); + self.push(or((rdi, imm32(0xff00)))); + self.push(test((TMP_REG, imm32(0xff00)))); + self.push(cmov(Condition::NotEqual, reg_size, TMP_REG, rdi)); + + self.push(mov(reg_size, rdi, TMP_REG)); + self.push(or((rdi, imm32(0xff0000)))); + self.push(test((TMP_REG, imm32(0xff0000)))); + self.push(cmov(Condition::NotEqual, reg_size, TMP_REG, rdi)); + + self.push(mov(reg_size, rdi, TMP_REG)); + self.push(or((rdi, imm32(0xff000000)))); + self.push(test((TMP_REG, imm32(0xff000000)))); + self.push(cmov(Condition::NotEqual, reg_size, TMP_REG, rdi)); + self.push(push(TMP_REG)); + + self.restore_registers_from_vmctx(); + self.push(pop(TMP_REG)); + self.push(ret()); + } + pub(crate) fn trace_execution(&mut self, code_offset: u32) { let step_label = self.step_label; let asm = self.asm.reserve::(); @@ -817,6 +852,196 @@ where asm.assert_reserved_exactly_as_needed(); } + #[inline(always)] + pub fn and_inverted(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s1 = conv_reg(s1); + let s2 = conv_reg(s2); + + // todo: change this with ANDN instruction + + let asm = self.asm.reserve::(); + if d == s1 { + // d = d & ~s2 + let asm = asm.push(mov(reg_size, TMP_REG, s2)); + let asm = asm.push(not(reg_size, TMP_REG)); + asm.push(and((reg_size, d, TMP_REG))) + } else if d == s2 { + // d = s1 & ~d + let asm = asm.push(not(reg_size, s2)); + let asm = asm.push(and((reg_size, d, s1))); + asm.push_none() + } else { + // d = s1 & ~s2 + let asm = asm.push(mov(reg_size, d, s2)); + let asm = asm.push(not(reg_size, d)); + asm.push(and((reg_size, d, s1))) + } + .assert_reserved_exactly_as_needed(); + } + + #[inline(always)] + pub fn or_inverted(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s1 = conv_reg(s1); + let s2 = conv_reg(s2); + + let asm = self.asm.reserve::(); + if d == s1 { + // d = d & ~s2 + let asm = asm.push(mov(reg_size, TMP_REG, s2)); + let asm = asm.push(not(reg_size, TMP_REG)); + asm.push(or((reg_size, d, TMP_REG))) + } else if d == s2 { + // d = s1 & ~d + let asm = asm.push(not(reg_size, s2)); + let asm = asm.push(or((reg_size, d, s1))); + asm.push_none() + } else { + // d = s1 & ~s2 + let asm = asm.push(mov(reg_size, d, s2)); + let asm = asm.push(not(reg_size, d)); + asm.push(or((reg_size, d, s1))) + } + .assert_reserved_exactly_as_needed(); + } + + #[inline(always)] + pub fn xnor(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + let reg_size = self.reg_size(); + self.xor(d, s1, s2); + let asm = self.asm.reserve::(); + asm.push(not(reg_size, conv_reg(d))).assert_reserved_exactly_as_needed(); + } + + #[inline(always)] + pub fn maximum(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s1 = conv_reg(s1); + let s2 = conv_reg(s2); + + let asm = self.asm.reserve::(); + if d == s1 { + let asm = asm.push(cmp((reg_size, s2, s1))); + asm.push(cmov(Condition::Greater, reg_size, d, s2)).push_none() + } else if d == s2 { + let asm = asm.push(cmp((reg_size, s1, s2))); + asm.push(cmov(Condition::Greater, reg_size, d, s1)).push_none() + } else { + let asm = asm.push(mov(reg_size, d, s1)); + let asm = asm.push(cmp((reg_size, s2, s1))); + asm.push(cmov(Condition::Greater, reg_size, d, s2)) + } + .assert_reserved_exactly_as_needed(); + } + + #[inline(always)] + pub fn maximum_unsigned(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s1 = conv_reg(s1); + let s2 = conv_reg(s2); + + let asm = self.asm.reserve::(); + if d == s1 { + let asm = asm.push(cmp((reg_size, s2, s1))); + asm.push(cmov(Condition::Above, reg_size, d, s2)).push_none() + } else if d == s2 { + let asm = asm.push(cmp((reg_size, s1, s2))); + asm.push(cmov(Condition::Above, reg_size, d, s1)).push_none() + } else { + let asm = asm.push(mov(reg_size, d, s1)); + let asm = asm.push(cmp((reg_size, s2, s1))); + asm.push(cmov(Condition::Above, reg_size, d, s2)) + } + .assert_reserved_exactly_as_needed(); + } + + #[inline(always)] + pub fn minimum(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s1 = conv_reg(s1); + let s2 = conv_reg(s2); + + let asm = self.asm.reserve::(); + if d == s1 { + let asm = asm.push(cmp((reg_size, s2, s1))); + asm.push(cmov(Condition::Less, reg_size, d, s2)).push_none() + } else if d == s2 { + let asm = asm.push(cmp((reg_size, s1, s2))); + asm.push(cmov(Condition::Less, reg_size, d, s1)).push_none() + } else { + let asm = asm.push(mov(reg_size, d, s1)); + let asm = asm.push(cmp((reg_size, s2, s1))); + asm.push(cmov(Condition::Less, reg_size, d, s2)) + } + .assert_reserved_exactly_as_needed(); + } + + #[inline(always)] + pub fn minimum_unsigned(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s1 = conv_reg(s1); + let s2 = conv_reg(s2); + + let asm = self.asm.reserve::(); + if d == s1 { + let asm = asm.push(cmp((reg_size, s2, s1))); + asm.push(cmov(Condition::Below, reg_size, d, s2)).push_none() + } else if d == s2 { + let asm = asm.push(cmp((reg_size, s1, s2))); + asm.push(cmov(Condition::Below, reg_size, d, s1)).push_none() + } else { + let asm = asm.push(mov(reg_size, d, s1)); + let asm = asm.push(cmp((reg_size, s2, s1))); + asm.push(cmov(Condition::Below, reg_size, d, s2)) + } + .assert_reserved_exactly_as_needed(); + } + + #[inline(always)] + pub fn rotate_left(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + self.rotate_left_word(d, s1, s2); + } + + #[inline(always)] + pub fn rotate_left_word(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s1 = conv_reg(s1); + let s2 = conv_reg(s2); + + let asm = self.asm.reserve::(); + let asm = asm.push(mov(reg_size, rcx, s2)); + let asm = asm.push_if(d != s1, mov(reg_size, d, s1)); + let asm = asm.push(rol_cl(reg_size, d)); + asm.assert_reserved_exactly_as_needed(); + } + + #[inline(always)] + pub fn rotate_right(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + self.rotate_right_word(d, s1, s2); + } + + #[inline(always)] + pub fn rotate_right_word(&mut self, d: RawReg, s1: RawReg, s2: RawReg) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s1 = conv_reg(s1); + let s2 = conv_reg(s2); + + let asm = self.asm.reserve::(); + let asm = asm.push(mov(reg_size, rcx, s2)); + let asm = asm.push_if(d != s1, mov(reg_size, d, s1)); + let asm = asm.push(ror_cl(reg_size, d)); + asm.assert_reserved_exactly_as_needed(); + } + #[inline(always)] #[cold] pub fn invalid(&mut self, code_offset: u32) { @@ -1527,6 +1752,72 @@ where self.mov(d, s); } + #[inline(always)] + pub fn count_leading_zero_bits(&mut self, d: RawReg, s: RawReg) { + self.count_leading_zero_bits_word(d, s); + } + + #[inline(always)] + pub fn count_leading_zero_bits_word(&mut self, d: RawReg, s: RawReg) { + self.push(lzcnt(self.reg_size(), conv_reg(d), conv_reg(s))) + } + + #[inline(always)] + pub fn count_trailing_zero_bits(&mut self, d: RawReg, s: RawReg) { + self.count_trailing_zero_bits_word(d, s); + } + + #[inline(always)] + pub fn count_trailing_zero_bits_word(&mut self, d: RawReg, s: RawReg) { + self.push(tzcnt(self.reg_size(), conv_reg(d), conv_reg(s))) + } + + #[inline(always)] + pub fn count_set_bits(&mut self, d: RawReg, s: RawReg) { + self.count_set_bits_word(d, s); + } + + #[inline(always)] + pub fn count_set_bits_word(&mut self, d: RawReg, s: RawReg) { + self.push(popcnt(self.reg_size(), conv_reg(d), conv_reg(s))) + } + + #[inline(always)] + pub fn sign_extend_byte(&mut self, d: RawReg, s: RawReg) { + self.push(movsx_8_to_64(conv_reg(d), conv_reg(s))) + } + + #[inline(always)] + pub fn sign_extend_half_word(&mut self, d: RawReg, s: RawReg) { + self.push(movsx_16_to_64(conv_reg(d), conv_reg(s))) + } + + #[inline(always)] + pub fn zero_extend_half_word(&mut self, d: RawReg, s: RawReg) { + self.push(movzx_16_to_64(conv_reg(d), conv_reg(s))) + } + + #[inline(always)] + pub fn or_combine_byte(&mut self, d: RawReg, s: RawReg) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s = conv_reg(s); + let or_combine_label = self.or_combine_label; + + self.push(mov(reg_size, TMP_REG, s)); + self.call_to_label(or_combine_label); + self.push(mov(reg_size, d, TMP_REG)); + } + + #[inline(always)] + pub fn reverse_byte(&mut self, d: RawReg, s: RawReg) { + let reg_size = self.reg_size(); + let asm = self.asm.reserve::(); + let asm = asm.push_if(d != s, mov(reg_size, conv_reg(d), conv_reg(s))); + let asm = asm.push(bswap(reg_size, conv_reg(d))); + asm.assert_reserved_exactly_as_needed(); + } + #[inline(always)] pub fn cmov_if_zero(&mut self, d: RawReg, s: RawReg, c: RawReg) { self.cmov(d, s, c, Condition::Equal); @@ -1547,6 +1838,42 @@ where self.cmov_imm(d, s, c, Condition::NotEqual); } + #[inline(always)] + pub fn rotate_right_imm(&mut self, d: RawReg, s: RawReg, c: u32) { + self.rotate_right_word_imm(d, s, c); + } + + #[inline(always)] + pub fn rotate_right_imm_alt(&mut self, d: RawReg, s: RawReg, c: u32) { + self.rotate_right_word_imm_alt(d, s, c); + } + + #[inline(always)] + pub fn rotate_right_word_imm(&mut self, d: RawReg, s: RawReg, c: u32) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s = conv_reg(s); + + let asm = self.asm.reserve::(); + let asm = asm.push(mov_imm(rcx, imm32(c))); + let asm = asm.push_if(d != s, mov(reg_size, d, s)); + let asm = asm.push(ror_cl(reg_size, d)); + asm.assert_reserved_exactly_as_needed(); + } + + #[inline(always)] + pub fn rotate_right_word_imm_alt(&mut self, d: RawReg, s: RawReg, c: u32) { + let reg_size = self.reg_size(); + let d = conv_reg(d); + let s = conv_reg(s); + + let asm = self.asm.reserve::(); + let asm = asm.push(mov(reg_size, rcx, s)); + let asm = asm.push(mov_imm(d, imm32(c))); + let asm = asm.push(ror_cl(reg_size, d)); + asm.assert_reserved_exactly_as_needed(); + } + #[inline(always)] fn add_imm_generic(&mut self, reg_size: RegSize, d: RawReg, s1: RawReg, s2: u32) { let d = conv_reg(d);