Skip to content

Commit

Permalink
x64: Fix codegen for the i8x16.swizzle instruction (#4318)
Browse files Browse the repository at this point in the history
This commit fixes a mistake in the `Swizzle` opcode implementation in
the x64 backend of Cranelift. Previously an input register was casted to
a writable register and then modified, which I believe instructions are
not supposed to do. This was discovered as part of my investigation
into #4315.
  • Loading branch information
alexcrichton authored Jun 27, 2022
1 parent 4543a07 commit dc2fe0a
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2554,17 +2554,18 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_load_const(constant, zero_mask, ty));

// Use the `zero_mask` on a writable `swizzle_mask`.
let swizzle_mask = Writable::from_reg(swizzle_mask);
let swizzle_mask_tmp = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
ctx.emit(Inst::gen_move(swizzle_mask_tmp, swizzle_mask, ty));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Paddusb,
RegMem::from(zero_mask),
swizzle_mask,
swizzle_mask_tmp,
));

// Shuffle `dst` using the fixed-up `swizzle_mask`.
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(swizzle_mask),
RegMem::from(swizzle_mask_tmp),
dst,
));
}
Expand Down

0 comments on commit dc2fe0a

Please sign in to comment.