From 210036a22eefa2e33d1a76a62d4ec6f5bc66a92b Mon Sep 17 00:00:00 2001 From: Brox Chen Date: Wed, 19 Feb 2025 11:37:24 -0500 Subject: [PATCH] [AMDGPU][True16][CodeGen] true16 codegen pattern for fma (#127240) Previous PR https://github.com/llvm/llvm-project/pull/122950 get reverted since it hit the buildbot failure. Another patch get merged when this PR is under review, and thus causing one test not up to date. repen this PR and fixed the issue. --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 2 + llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 125 +++++-- llvm/lib/Target/AMDGPU/SIInstructions.td | 8 + .../Target/AMDGPU/SIShrinkInstructions.cpp | 17 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll | 68 ++-- .../CodeGen/AMDGPU/fix-sgpr-copies-f16.mir | 3 +- llvm/test/CodeGen/AMDGPU/fma.f16.ll | 328 +++++++++++++----- llvm/test/CodeGen/AMDGPU/preserve-hi16.ll | 3 +- .../CodeGen/AMDGPU/shrink-mad-fma-fake16.mir | 242 +++++++++++++ .../CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir | 258 ++++++++++++++ llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir | 115 +----- 11 files changed, 913 insertions(+), 256 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-mad-fma-fake16.mir create mode 100644 llvm/test/CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index ab396929162d..fa15e73bc31d 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -203,6 +203,8 @@ static unsigned macToMad(unsigned Opc) { return AMDGPU::V_FMA_F32_e64; case AMDGPU::V_FMAC_F16_e64: return AMDGPU::V_FMA_F16_gfx9_e64; + case AMDGPU::V_FMAC_F16_t16_e64: + return AMDGPU::V_FMA_F16_gfx9_t16_e64; case AMDGPU::V_FMAC_F16_fake16_e64: return AMDGPU::V_FMA_F16_gfx9_fake16_e64; case AMDGPU::V_FMAC_LEGACY_F32_e64: diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 7dace11d208a..2691a4135b6f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3461,6 +3461,62 @@ std::optional SIInstrInfo::extractSubregFromImm(int64_t Imm, llvm_unreachable("covered subregister switch"); } +static unsigned getNewFMAAKInst(const GCNSubtarget &ST, unsigned Opc) { + switch (Opc) { + case AMDGPU::V_MAC_F16_e32: + case AMDGPU::V_MAC_F16_e64: + case AMDGPU::V_MAD_F16_e64: + return AMDGPU::V_MADAK_F16; + case AMDGPU::V_MAC_F32_e32: + case AMDGPU::V_MAC_F32_e64: + case AMDGPU::V_MAD_F32_e64: + return AMDGPU::V_MADAK_F32; + case AMDGPU::V_FMAC_F32_e32: + case AMDGPU::V_FMAC_F32_e64: + case AMDGPU::V_FMA_F32_e64: + return AMDGPU::V_FMAAK_F32; + case AMDGPU::V_FMAC_F16_e32: + case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F16_t16_e64: + case AMDGPU::V_FMAC_F16_fake16_e64: + case AMDGPU::V_FMA_F16_e64: + return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts() + ? AMDGPU::V_FMAAK_F16_t16 + : AMDGPU::V_FMAAK_F16_fake16 + : AMDGPU::V_FMAAK_F16; + default: + llvm_unreachable("invalid instruction"); + } +} + +static unsigned getNewFMAMKInst(const GCNSubtarget &ST, unsigned Opc) { + switch (Opc) { + case AMDGPU::V_MAC_F16_e32: + case AMDGPU::V_MAC_F16_e64: + case AMDGPU::V_MAD_F16_e64: + return AMDGPU::V_MADMK_F16; + case AMDGPU::V_MAC_F32_e32: + case AMDGPU::V_MAC_F32_e64: + case AMDGPU::V_MAD_F32_e64: + return AMDGPU::V_MADMK_F32; + case AMDGPU::V_FMAC_F32_e32: + case AMDGPU::V_FMAC_F32_e64: + case AMDGPU::V_FMA_F32_e64: + return AMDGPU::V_FMAMK_F32; + case AMDGPU::V_FMAC_F16_e32: + case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F16_t16_e64: + case AMDGPU::V_FMAC_F16_fake16_e64: + case AMDGPU::V_FMA_F16_e64: + return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts() + ? AMDGPU::V_FMAMK_F16_t16 + : AMDGPU::V_FMAMK_F16_fake16 + : AMDGPU::V_FMAMK_F16; + default: + llvm_unreachable("invalid instruction"); + } +} + bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Register Reg, MachineRegisterInfo *MRI) const { if (!MRI->hasOneNonDBGUse(Reg)) @@ -3533,6 +3589,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 || Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64) { // Don't fold if we are using source or output modifiers. The new VOP2 // instructions don't have them. @@ -3555,6 +3612,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, bool IsFMA = Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64; MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1); MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2); @@ -3586,18 +3644,15 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, !isInlineConstant(Def->getOperand(1))) return false; - unsigned NewOpc = - IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32 - : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16 - : AMDGPU::V_FMAMK_F16) - : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16); + unsigned NewOpc = getNewFMAMKInst(ST, Opc); if (pseudoToMCOpcode(NewOpc) == -1) return false; - // V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite - // would also require restricting their register classes. For now - // just bail out. - if (NewOpc == AMDGPU::V_FMAMK_F16_fake16) + // V_FMAMK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAMK_F16_fake16 + // takes VGPR_32_Lo128 operands, so the rewrite would also require + // restricting their register classes. For now just bail out. + if (NewOpc == AMDGPU::V_FMAMK_F16_t16 || + NewOpc == AMDGPU::V_FMAMK_F16_fake16) return false; const std::optional SubRegImm = extractSubregFromImm( @@ -3613,7 +3668,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, Src0->setIsKill(RegSrc->isKill()); if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); @@ -3666,25 +3721,22 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, } } - unsigned NewOpc = - IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32 - : ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16 - : AMDGPU::V_FMAAK_F16) - : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16); + unsigned NewOpc = getNewFMAAKInst(ST, Opc); if (pseudoToMCOpcode(NewOpc) == -1) return false; - // V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite - // would also require restricting their register classes. For now - // just bail out. - if (NewOpc == AMDGPU::V_FMAAK_F16_fake16) + // V_FMAAK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAAK_F16_fake16 + // takes VGPR_32_Lo128 operands, so the rewrite would also require + // restricting their register classes. For now just bail out. + if (NewOpc == AMDGPU::V_FMAAK_F16_t16 || + NewOpc == AMDGPU::V_FMAAK_F16_fake16) return false; // FIXME: This would be a lot easier if we could return a new instruction // instead of having to modify in place. if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 || - Opc == AMDGPU::V_FMAC_F32_e64 || + Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64) UseMI.untieRegOperand( AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)); @@ -3874,8 +3926,11 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) { return AMDGPU::V_FMA_LEGACY_F32_e64; case AMDGPU::V_FMAC_F16_e32: case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_FMAC_F16_fake16_e64: - return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64 + return ST.hasTrue16BitInsts() ? ST.useRealTrue16Insts() + ? AMDGPU::V_FMA_F16_gfx9_t16_e64 + : AMDGPU::V_FMA_F16_gfx9_fake16_e64 : AMDGPU::V_FMA_F16_gfx9_e64; case AMDGPU::V_FMAC_F32_e32: case AMDGPU::V_FMAC_F32_e64: @@ -3941,19 +3996,21 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return MIB; } - assert( - Opc != AMDGPU::V_FMAC_F16_fake16_e32 && - "V_FMAC_F16_fake16_e32 is not supported and not expected to be present " - "pre-RA"); + assert(Opc != AMDGPU::V_FMAC_F16_t16_e32 && + Opc != AMDGPU::V_FMAC_F16_fake16_e32 && + "V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be " + "present pre-RA"); // Handle MAC/FMAC. bool IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64; bool IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 || Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 || Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 || + Opc == AMDGPU::V_FMAC_F16_t16_e64 || Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64; @@ -3968,6 +4025,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return nullptr; case AMDGPU::V_MAC_F16_e64: case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_FMAC_F16_fake16_e64: case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_LEGACY_F32_e64: @@ -4052,11 +4110,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, int64_t Imm; if (!Src0Literal && getFoldableImm(Src2, Imm, &DefMI)) { - unsigned NewOpc = - IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16 - : AMDGPU::V_FMAAK_F16) - : AMDGPU::V_FMAAK_F32) - : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32); + unsigned NewOpc = getNewFMAAKInst(ST, Opc); if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) .add(*Dst) @@ -4071,11 +4125,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI, return MIB; } } - unsigned NewOpc = - IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16 - : AMDGPU::V_FMAMK_F16) - : AMDGPU::V_FMAMK_F32) - : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32); + unsigned NewOpc = getNewFMAMKInst(ST, Opc); if (!Src0Literal && getFoldableImm(Src1, Imm, &DefMI)) { if (pseudoToMCOpcode(NewOpc) != -1) { MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) @@ -4513,6 +4563,7 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI, case AMDGPU::V_MAC_F32_e64: case AMDGPU::V_MAC_LEGACY_F32_e64: case AMDGPU::V_FMAC_F16_e64: + case AMDGPU::V_FMAC_F16_t16_e64: case AMDGPU::V_FMAC_F16_fake16_e64: case AMDGPU::V_FMAC_F32_e64: case AMDGPU::V_FMAC_F64_e64: @@ -5569,7 +5620,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64; case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64; case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64; - case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64; + case AMDGPU::S_FMAC_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64 + : AMDGPU::V_FMAC_F16_fake16_e64; case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32; case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32; case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 6e08aff24ec2..3faf0795157d 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -3287,6 +3287,14 @@ def : GCNPat < (V_FMAC_F16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2) >; +let True16Predicate = UseRealTrue16Insts in +def : GCNPat < + (fma (f16 (VOP3NoMods f16:$src0)), + (f16 (VOP3NoMods f16:$src1)), + (f16 (VOP3NoMods f16:$src2))), + (V_FMAC_F16_t16_e64 SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, + SRCMODS.NONE, $src2) +>; let True16Predicate = UseFakeTrue16Insts in def : GCNPat < (fma (f16 (VOP3NoMods f16:$src0)), diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index 979812e07fc3..f03cde455f29 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -455,9 +455,13 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: + NewOpcode = AMDGPU::V_FMAAK_F16; + break; + case AMDGPU::V_FMA_F16_gfx9_t16_e64: + NewOpcode = AMDGPU::V_FMAAK_F16_t16; + break; case AMDGPU::V_FMA_F16_gfx9_fake16_e64: - NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16 - : AMDGPU::V_FMAAK_F16; + NewOpcode = AMDGPU::V_FMAAK_F16_fake16; break; } } @@ -485,9 +489,13 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { break; case AMDGPU::V_FMA_F16_e64: case AMDGPU::V_FMA_F16_gfx9_e64: + NewOpcode = AMDGPU::V_FMAMK_F16; + break; + case AMDGPU::V_FMA_F16_gfx9_t16_e64: + NewOpcode = AMDGPU::V_FMAMK_F16_t16; + break; case AMDGPU::V_FMA_F16_gfx9_fake16_e64: - NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16 - : AMDGPU::V_FMAMK_F16; + NewOpcode = AMDGPU::V_FMAMK_F16_fake16; break; } } @@ -959,6 +967,7 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { MI.getOpcode() == AMDGPU::V_MAD_F16_e64 || MI.getOpcode() == AMDGPU::V_FMA_F16_e64 || MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 || + MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 || MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) { shrinkMadFma(MI); continue; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll index 99e6c5d06a0e..0b09cabf25a1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fma.ll @@ -3,7 +3,8 @@ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s +; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s define float @v_fma_f32(float %x, float %y, float %z) { ; GFX6-LABEL: v_fma_f32: @@ -107,11 +108,18 @@ define half @v_fma_f16(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_fma_f16: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: v_fma_f16: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v2.l, v0.l, v1.l +; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_fma_f16: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %fma = call half @llvm.fma.f16(half %x, half %y, half %z) ret half %fma } @@ -145,11 +153,17 @@ define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_fma_f16_fneg_lhs: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: v_fma_f16_fneg_lhs: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v1.l, v2.l +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_fma_f16_fneg_lhs: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_fma_f16 v0, -v0, v1, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %neg.x = fneg half %x %fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z) ret half %fma @@ -184,11 +198,17 @@ define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_fma_f16_fneg_rhs: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: v_fma_f16_fneg_rhs: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, v2.l +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_fma_f16_fneg_rhs: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %neg.y = fneg half %y %fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z) ret half %fma @@ -223,11 +243,17 @@ define half @v_fma_f16_fneg_add(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: v_fma_f16_fneg_add: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: v_fma_f16_fneg_add: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v1.l, -v2.l +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: v_fma_f16_fneg_add: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, -v2 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %neg.z = fneg half %z %fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z) ret half %fma diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir index ac7944f25fe3..23e4b80b61f6 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-f16.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s +# FIXME-TRUE16. reenable after fix-sgpr-copies is fixed for true16 flow +# XUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,REAL16 %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN,FAKE16 %s --- diff --git a/llvm/test/CodeGen/AMDGPU/fma.f16.ll b/llvm/test/CodeGen/AMDGPU/fma.f16.ll index 52a23690dcf5..a33fd03e0ce0 100644 --- a/llvm/test/CodeGen/AMDGPU/fma.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fma.f16.ll @@ -3,8 +3,10 @@ ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX9,GFX9-GISEL ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-SDAG ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX10,GFX10-GISEL -; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-SDAG -; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11,GFX11-GISEL +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-SDAG-TRUE16 +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-SDAG-FAKE16 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-GISEL-TRUE16 +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX11-GISEL-FAKE16 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-SDAG ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck %s -check-prefixes=GFX12,GFX12-GISEL @@ -24,11 +26,34 @@ define half @test_fma(half %x, half %y, half %z) { ; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: test_fma: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-TRUE16-LABEL: test_fma: +; GFX11-SDAG-TRUE16: ; %bb.0: +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v0.h, v1.l +; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-FAKE16-LABEL: test_fma: +; GFX11-SDAG-FAKE16: ; %bb.0: +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-TRUE16-LABEL: test_fma: +; GFX11-GISEL-TRUE16: ; %bb.0: +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_fmac_f16_e32 v2.l, v0.l, v1.l +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-FAKE16-LABEL: test_fma: +; GFX11-GISEL-FAKE16: ; %bb.0: +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fma: ; GFX12: ; %bb.0: @@ -57,11 +82,31 @@ define half @test_fmac(half %x, half %y, half %z) { ; GFX10-NEXT: v_fmac_f16_e32 v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: test_fmac: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fmac_f16_e32 v0, v1, v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-TRUE16-LABEL: test_fmac: +; GFX11-SDAG-TRUE16: ; %bb.0: +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v1.h, v2.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v1.l, v1.h +; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-FAKE16-LABEL: test_fmac: +; GFX11-SDAG-FAKE16: ; %bb.0: +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_fmac_f16_e32 v0, v1, v2 +; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-TRUE16-LABEL: test_fmac: +; GFX11-GISEL-TRUE16: ; %bb.0: +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_fmac_f16_e32 v0.l, v1.l, v2.l +; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-FAKE16-LABEL: test_fmac: +; GFX11-GISEL-FAKE16: ; %bb.0: +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_fmac_f16_e32 v0, v1, v2 +; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fmac: ; GFX12: ; %bb.0: @@ -98,11 +143,31 @@ define half @test_fmaak(half %x, half %y, half %z) { ; GFX10-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: test_fmaak: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-TRUE16-LABEL: test_fmaak: +; GFX11-SDAG-TRUE16: ; %bb.0: +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v0.h, 0x4200 +; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-FAKE16-LABEL: test_fmaak: +; GFX11-SDAG-FAKE16: ; %bb.0: +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 +; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-TRUE16-LABEL: test_fmaak: +; GFX11-GISEL-TRUE16: ; %bb.0: +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_fmaak_f16 v0.l, v0.l, v1.l, 0x4200 +; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-FAKE16-LABEL: test_fmaak: +; GFX11-GISEL-FAKE16: ; %bb.0: +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_fmaak_f16 v0, v0, v1, 0x4200 +; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fmaak: ; GFX12: ; %bb.0: @@ -139,11 +204,33 @@ define half @test_fmamk(half %x, half %y, half %z) { ; GFX10-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: test_fmamk: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-TRUE16-LABEL: test_fmamk: +; GFX11-SDAG-TRUE16: ; %bb.0: +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_fmamk_f16 v0.l, v0.l, 0x4200, v0.h +; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-FAKE16-LABEL: test_fmamk: +; GFX11-SDAG-FAKE16: ; %bb.0: +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 +; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-TRUE16-LABEL: test_fmamk: +; GFX11-GISEL-TRUE16: ; %bb.0: +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_fmac_f16_e32 v2.l, 0x4200, v0.l +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, v2 +; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-FAKE16-LABEL: test_fmamk: +; GFX11-GISEL-FAKE16: ; %bb.0: +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_fmamk_f16 v0, v0, 0x4200, v2 +; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-LABEL: test_fmamk: ; GFX12: ; %bb.0: @@ -208,33 +295,61 @@ define i32 @test_D139469_f16(half %arg) { ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: test_D139469_f16: -; GFX11-SDAG: ; %bb.0: ; %bb -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, 0x211e -; GFX11-SDAG-NEXT: v_mul_f16_e32 v2, 0x291e, v0 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 -; GFX11-SDAG-NEXT: v_min_f16_e32 v0, v2, v1 -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: test_D139469_f16: -; GFX11-GISEL: ; %bb.0: ; %bb -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e -; GFX11-GISEL-NEXT: v_mul_f16_e32 v2, 0x291e, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 -; GFX11-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v1 -; GFX11-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-TRUE16-LABEL: test_D139469_f16: +; GFX11-SDAG-TRUE16: ; %bb.0: ; %bb +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x211e +; GFX11-SDAG-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x291e, v0.l +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 0x291e, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_min_f16_e32 v0.l, v1.l, v0.h +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-FAKE16-LABEL: test_D139469_f16: +; GFX11-SDAG-FAKE16: ; %bb.0: ; %bb +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v1, 0x211e +; GFX11-SDAG-FAKE16-NEXT: v_mul_f16_e32 v2, 0x291e, v0 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-FAKE16-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 +; GFX11-SDAG-FAKE16-NEXT: v_min_f16_e32 v0, v2, v1 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SDAG-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 +; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-TRUE16-LABEL: test_D139469_f16: +; GFX11-GISEL-TRUE16: ; %bb.0: ; %bb +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0x211e +; GFX11-GISEL-TRUE16-NEXT: v_mul_f16_e32 v1.l, 0x291e, v0.l +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-TRUE16-NEXT: v_fmac_f16_e32 v0.h, 0x291e, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1.l +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e64 s0, 0, v0.h +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-FAKE16-LABEL: test_D139469_f16: +; GFX11-GISEL-FAKE16: ; %bb.0: ; %bb +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0x211e +; GFX11-GISEL-FAKE16-NEXT: v_mul_f16_e32 v2, 0x291e, v0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-FAKE16-NEXT: v_fmac_f16_e32 v1, 0x291e, v0 +; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e64 s0, 0, v1 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-LABEL: test_D139469_f16: ; GFX12-SDAG: ; %bb.0: ; %bb @@ -347,44 +462,83 @@ define <2 x i32> @test_D139469_v2f16(<2 x half> %arg) { ; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-SDAG-LABEL: test_D139469_v2f16: -; GFX11-SDAG: ; %bb.0: ; %bb -; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-SDAG-NEXT: s_movk_i32 s0, 0x211e -; GFX11-SDAG-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1] -; GFX11-SDAG-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s0 op_sel_hi:[0,1,0] -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-SDAG-NEXT: v_pk_min_f16 v0, v1, v0 -; GFX11-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX11-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo -; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX11-SDAG-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1 -; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo -; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-GISEL-LABEL: test_D139469_v2f16: -; GFX11-GISEL: ; %bb.0: ; %bb -; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0x211e211e -; GFX11-GISEL-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1 -; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX11-GISEL-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) -; GFX11-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX11-GISEL-NEXT: v_cmp_gt_f16_e64 s0, 0, v0 -; GFX11-GISEL-NEXT: v_cmp_gt_f16_e64 s1, 0, v1 -; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-GISEL-NEXT: v_cmp_gt_f16_e64 s2, 0, v3 -; GFX11-GISEL-NEXT: s_or_b32 s0, vcc_lo, s0 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX11-GISEL-NEXT: s_or_b32 s0, s1, s2 -; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 -; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] +; GFX11-SDAG-TRUE16-LABEL: test_D139469_v2f16: +; GFX11-SDAG-TRUE16: ; %bb.0: ; %bb +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s0, 0x211e +; GFX11-SDAG-TRUE16-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1] +; GFX11-SDAG-TRUE16-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s0 op_sel_hi:[0,1,0] +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-TRUE16-NEXT: v_pk_min_f16 v0, v1, v0 +; GFX11-SDAG-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-SDAG-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-SDAG-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1.l +; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-SDAG-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-FAKE16-LABEL: test_D139469_v2f16: +; GFX11-SDAG-FAKE16: ; %bb.0: ; %bb +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s0, 0x211e +; GFX11-SDAG-FAKE16-NEXT: v_pk_mul_f16 v1, 0x291e, v0 op_sel_hi:[0,1] +; GFX11-SDAG-FAKE16-NEXT: v_pk_fma_f16 v0, 0x291e, v0, s0 op_sel_hi:[0,1,0] +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-SDAG-FAKE16-NEXT: v_pk_min_f16 v0, v1, v0 +; GFX11-SDAG-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-SDAG-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v0 +; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-SDAG-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v1 +; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo +; GFX11-SDAG-FAKE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-TRUE16-LABEL: test_D139469_v2f16: +; GFX11-GISEL-TRUE16: ; %bb.0: ; %bb +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v1, 0x211e211e +; GFX11-GISEL-TRUE16-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-TRUE16-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1 +; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2.l +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-GISEL-TRUE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e64 s0, 0, v0.l +; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e64 s1, 0, v1.l +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-TRUE16-NEXT: v_cmp_gt_f16_e64 s2, 0, v3.l +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s0, s1, s2 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11-GISEL-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-FAKE16-LABEL: test_D139469_v2f16: +; GFX11-GISEL-FAKE16: ; %bb.0: ; %bb +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v1, 0x211e211e +; GFX11-GISEL-FAKE16-NEXT: v_pk_mul_f16 v2, 0x291e291e, v0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-FAKE16-NEXT: v_pk_fma_f16 v0, 0x291e291e, v0, v1 +; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v2 +; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e32 vcc_lo, 0, v2 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) +; GFX11-GISEL-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v0 +; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e64 s0, 0, v0 +; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e64 s1, 0, v1 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-GISEL-FAKE16-NEXT: v_cmp_gt_f16_e64 s2, 0, v3 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s0, vcc_lo, s0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s0, s1, s2 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11-GISEL-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX12-SDAG-LABEL: test_D139469_v2f16: ; GFX12-SDAG: ; %bb.0: ; %bb diff --git a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll index 0ad1c30b5b5a..1f36101c7b53 100644 --- a/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll +++ b/llvm/test/CodeGen/AMDGPU/preserve-hi16.ll @@ -814,7 +814,8 @@ define i32 @zext_fma_f16(half %x, half %y, half %z) { ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l -; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v0.h, v1.l +; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v1.l, v0.l, v0.h +; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v1.l ; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-fake16.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-fake16.mir new file mode 100644 index 000000000000..d551ad88f56b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-fake16.mir @@ -0,0 +1,242 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX11 + +--- +name: mad_cvv_f32 +body: | + bb.0: + ; GFX11-LABEL: name: mad_cvv_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vcv_f32 +body: | + bb.0: + ; GFX11-LABEL: name: mad_vcv_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vvc_f32 +body: | + bb.0: + ; GFX11-LABEL: name: mad_vvc_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vsc_f32 +body: | + bb.0: + ; GFX11-LABEL: name: mad_vsc_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_cvv_f32 +body: | + bb.0: + ; GFX11-LABEL: name: fma_cvv_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vcv_f32 +body: | + bb.0: + ; GFX11-LABEL: name: fma_vcv_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vvc_f32 +body: | + bb.0: + ; GFX11-LABEL: name: fma_vvc_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vsc_f32 +body: | + bb.0: + ; GFX11-LABEL: name: fma_vsc_f32 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_cvv_f16 +body: | + bb.0: + ; GFX11-LABEL: name: mad_cvv_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F16_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vcv_f16 +body: | + bb.0: + ; GFX11-LABEL: name: mad_vcv_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vvc_f16 +body: | + bb.0: + ; GFX11-LABEL: name: mad_vvc_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vsc_f16 +body: | + bb.0: + ; GFX11-LABEL: name: mad_vsc_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_cvv_f16 +body: | + bb.0: + ; GFX11-LABEL: name: fma_cvv_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_fake16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F16_gfx9_fake16_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vcv_f16 +body: | + bb.0: + ; GFX11-LABEL: name: fma_vcv_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_fake16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F16_gfx9_fake16_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vvc_f16 +body: | + bb.0: + ; GFX11-LABEL: name: fma_vvc_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_fake16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F16_gfx9_fake16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vsc_f16 +body: | + bb.0: + ; GFX11-LABEL: name: fma_vsc_f16 + ; GFX11: $vgpr0 = IMPLICIT_DEF + ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_fake16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: SI_RETURN implicit $vgpr2 + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F16_gfx9_fake16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... diff --git a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir new file mode 100644 index 000000000000..89ef5df9beb8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma-gfx10.mir @@ -0,0 +1,258 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX10 + +--- +name: mad_cvv_f32 +body: | + bb.0: + ; GFX10-LABEL: name: mad_cvv_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vcv_f32 +body: | + bb.0: + ; GFX10-LABEL: name: mad_vcv_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vvc_f32 +body: | + bb.0: + ; GFX10-LABEL: name: mad_vvc_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vsc_f32 +body: | + bb.0: + ; GFX10-LABEL: name: mad_vsc_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_cvv_f32 +body: | + bb.0: + ; GFX10-LABEL: name: fma_cvv_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F32_e64 0, 1092616192, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vcv_f32 +body: | + bb.0: + ; GFX10-LABEL: name: fma_vcv_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, 1092616192, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vvc_f32 +body: | + bb.0: + ; GFX10-LABEL: name: fma_vvc_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vsc_f32 +body: | + bb.0: + ; GFX10-LABEL: name: fma_vsc_f32 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F32_e64 0, $vgpr0, 0, $vgpr1, 0, 1092616192, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_cvv_f16 +body: | + bb.0: + ; GFX10-LABEL: name: mad_cvv_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F16_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vcv_f16 +body: | + bb.0: + ; GFX10-LABEL: name: mad_vcv_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vvc_f16 +body: | + bb.0: + ; GFX10-LABEL: name: mad_vvc_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: mad_vsc_f16 +body: | + bb.0: + ; GFX10-LABEL: name: mad_vsc_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr2 = V_MAD_F16_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_cvv_f16 +body: | + bb.0: + ; GFX10-LABEL: name: fma_cvv_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vcv_f16 +body: | + bb.0: + ; GFX10-LABEL: name: fma_vcv_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vvc_f16 +body: | + bb.0: + ; GFX10-LABEL: name: fma_vvc_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... + +--- +name: fma_vsc_f16 +body: | + bb.0: + ; GFX10-LABEL: name: fma_vsc_f16 + ; GFX10: $vgpr0 = IMPLICIT_DEF + ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF + ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX10-NEXT: SI_RETURN implicit $vgpr2 + ; + $vgpr0 = IMPLICIT_DEF + $sgpr1 = IMPLICIT_DEF + $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec + SI_RETURN implicit $vgpr2 +... diff --git a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir index 26feb8120c75..c9138dda7d1a 100644 --- a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir +++ b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir @@ -1,17 +1,10 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX10 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX11 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GFX11 --- name: mad_cvv_f32 body: | bb.0: - ; GFX10-LABEL: name: mad_cvv_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: mad_cvv_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -27,12 +20,6 @@ body: | name: mad_vcv_f32 body: | bb.0: - ; GFX10-LABEL: name: mad_vcv_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: mad_vcv_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -48,12 +35,6 @@ body: | name: mad_vvc_f32 body: | bb.0: - ; GFX10-LABEL: name: mad_vvc_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: mad_vvc_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -69,12 +50,6 @@ body: | name: mad_vsc_f32 body: | bb.0: - ; GFX10-LABEL: name: mad_vsc_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: mad_vsc_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF @@ -90,12 +65,6 @@ body: | name: fma_cvv_f32 body: | bb.0: - ; GFX10-LABEL: name: fma_cvv_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: fma_cvv_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -111,12 +80,6 @@ body: | name: fma_vcv_f32 body: | bb.0: - ; GFX10-LABEL: name: fma_vcv_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAMK_F32 $vgpr0, 1092616192, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: fma_vcv_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -132,12 +95,6 @@ body: | name: fma_vvc_f32 body: | bb.0: - ; GFX10-LABEL: name: fma_vvc_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: fma_vvc_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -153,12 +110,6 @@ body: | name: fma_vsc_f32 body: | bb.0: - ; GFX10-LABEL: name: fma_vsc_f32 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAAK_F32 $vgpr0, $vgpr1, 1092616192, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: fma_vsc_f32 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF @@ -174,12 +125,6 @@ body: | name: mad_cvv_f16 body: | bb.0: - ; GFX10-LABEL: name: mad_cvv_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: mad_cvv_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -195,12 +140,6 @@ body: | name: mad_vcv_f16 body: | bb.0: - ; GFX10-LABEL: name: mad_vcv_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: mad_vcv_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -216,12 +155,6 @@ body: | name: mad_vvc_f16 body: | bb.0: - ; GFX10-LABEL: name: mad_vvc_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: mad_vvc_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF @@ -237,12 +170,6 @@ body: | name: mad_vsc_f16 body: | bb.0: - ; GFX10-LABEL: name: mad_vsc_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_MADAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: mad_vsc_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF @@ -258,20 +185,14 @@ body: | name: fma_cvv_f16 body: | bb.0: - ; GFX10-LABEL: name: fma_cvv_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: fma_cvv_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_fake16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr2_lo16 = V_FMAMK_F16_t16 $vgpr0_lo16, 18688, $vgpr1_lo16, implicit $mode, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_lo16 = V_FMA_F16_gfx9_t16_e64 0, 18688, 0, $vgpr0_lo16, 0, $vgpr1_lo16, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... @@ -279,20 +200,14 @@ body: | name: fma_vcv_f16 body: | bb.0: - ; GFX10-LABEL: name: fma_vcv_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAMK_F16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: fma_vcv_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAMK_F16_fake16 $vgpr0, 18688, $vgpr1, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr2_lo16 = V_FMAMK_F16_t16 $vgpr0_lo16, 18688, $vgpr1_lo16, implicit $mode, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_lo16 = V_FMA_F16_gfx9_t16_e64 0, $vgpr0_lo16, 0, 18688, 0, $vgpr1_lo16, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... @@ -300,20 +215,14 @@ body: | name: fma_vvc_f16 body: | bb.0: - ; GFX10-LABEL: name: fma_vvc_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: fma_vvc_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $vgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_fake16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr2_lo16 = V_FMAAK_F16_t16 $vgpr0_lo16, $vgpr1_lo16, 18688, implicit $mode, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_lo16 = V_FMA_F16_gfx9_t16_e64 0, $vgpr0_lo16, 0, $vgpr1_lo16, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ... @@ -321,19 +230,13 @@ body: | name: fma_vsc_f16 body: | bb.0: - ; GFX10-LABEL: name: fma_vsc_f16 - ; GFX10: $vgpr0 = IMPLICIT_DEF - ; GFX10-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX10-NEXT: $vgpr2 = V_FMAAK_F16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec - ; GFX10-NEXT: SI_RETURN implicit $vgpr2 - ; ; GFX11-LABEL: name: fma_vsc_f16 ; GFX11: $vgpr0 = IMPLICIT_DEF ; GFX11-NEXT: $sgpr1 = IMPLICIT_DEF - ; GFX11-NEXT: $vgpr2 = V_FMAAK_F16_fake16 $vgpr0, $vgpr1, 18688, implicit $mode, implicit $exec + ; GFX11-NEXT: $vgpr2_lo16 = V_FMAAK_F16_t16 $vgpr0_hi16, $vgpr1_hi16, 18688, implicit $mode, implicit $exec ; GFX11-NEXT: SI_RETURN implicit $vgpr2 $vgpr0 = IMPLICIT_DEF $sgpr1 = IMPLICIT_DEF - $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec + $vgpr2_lo16 = V_FMA_F16_gfx9_t16_e64 0, $vgpr0_hi16, 0, $vgpr1_hi16, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec SI_RETURN implicit $vgpr2 ...