Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[IA][RISCV] Support VP loads/stores in InterleavedAccessPass #120490

Merged
merged 12 commits into from
Feb 4, 2025
25 changes: 25 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class TargetRegisterClass;
class TargetRegisterInfo;
class TargetTransformInfo;
class Value;
class VPIntrinsic;

namespace Sched {

Expand Down Expand Up @@ -3152,6 +3153,30 @@ class TargetLoweringBase {
return false;
}

/// Lower an interleaved load to target specific intrinsics. Return
/// true on success.
///
/// \p Load is a vp.load instruction.
/// \p Mask is a mask value
/// \p DeinterleaveRes is a list of deinterleaved results.
virtual bool
lowerDeinterleavedIntrinsicToVPLoad(VPIntrinsic *Load, Value *Mask,
ArrayRef<Value *> DeinterleaveRes) const {
return false;
}

/// Lower an interleaved store to target specific intrinsics. Return
/// true on success.
///
/// \p Store is the vp.store instruction.
/// \p Mask is a mask value
/// \p InterleaveOps is a list of values being interleaved.
virtual bool
lowerInterleavedIntrinsicToVPStore(VPIntrinsic *Store, Value *Mask,
ArrayRef<Value *> InterleaveOps) const {
return false;
}

/// Lower a deinterleave intrinsic to a target specific load intrinsic.
/// Return true on success. Currently only supports
/// llvm.vector.deinterleave2
Expand Down
115 changes: 96 additions & 19 deletions llvm/lib/CodeGen/InterleavedAccessPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,11 +630,37 @@ getVectorDeinterleaveFactor(IntrinsicInst *II,
return true;
}

// Return the corresponded deinterleaved mask, or nullptr if there is no valid
// mask.
static Value *getMask(Value *WideMask, unsigned Factor,
VectorType *LeafValueTy) {
using namespace llvm::PatternMatch;
if (auto *IMI = dyn_cast<IntrinsicInst>(WideMask)) {
SmallVector<Value *, 8> Operands;
SmallVector<Instruction *, 8> DeadInsts;
if (getVectorInterleaveFactor(IMI, Operands, DeadInsts)) {
assert(!Operands.empty());
if (Operands.size() == Factor && llvm::all_equal(Operands))
return Operands[0];
}
}

if (match(WideMask, m_AllOnes())) {
// Scale the vector length of all-ones mask.
ElementCount OrigEC =
cast<VectorType>(WideMask->getType())->getElementCount();
assert(OrigEC.getKnownMinValue() % Factor == 0);
return ConstantVector::getSplat(OrigEC.divideCoefficientBy(Factor),
cast<Constant>(WideMask)->getSplatValue());
}

return nullptr;
}

bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
IntrinsicInst *DI, SmallSetVector<Instruction *, 32> &DeadInsts) {
LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));

if (!LI || !LI->hasOneUse() || !LI->isSimple())
Value *LoadedVal = DI->getOperand(0);
if (!LoadedVal->hasOneUse() || !isa<LoadInst, VPIntrinsic>(LoadedVal))
return false;

SmallVector<Value *, 8> DeinterleaveValues;
Expand All @@ -643,43 +669,94 @@ bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
DeinterleaveDeadInsts))
return false;

LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI
<< " with factor = " << DeinterleaveValues.size() << "\n");
const unsigned Factor = DeinterleaveValues.size();

// Try and match this with target specific intrinsics.
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
return false;
if (auto *VPLoad = dyn_cast<VPIntrinsic>(LoadedVal)) {
if (VPLoad->getIntrinsicID() != Intrinsic::vp_load)
return false;
// Check mask operand. Handle both all-true and interleaved mask.
Value *WideMask = VPLoad->getOperand(1);
Value *Mask = getMask(WideMask, Factor,
cast<VectorType>(DeinterleaveValues[0]->getType()));
if (!Mask)
return false;

LLVM_DEBUG(dbgs() << "IA: Found a vp.load with deinterleave intrinsic "
<< *DI << " and factor = " << Factor << "\n");

// Since lowerInterleaveLoad expects Shuffles and LoadInst, use special
// TLI function to emit target-specific interleaved instruction.
if (!TLI->lowerDeinterleavedIntrinsicToVPLoad(VPLoad, Mask,
DeinterleaveValues))
return false;

} else {
auto *LI = cast<LoadInst>(LoadedVal);
if (!LI->isSimple())
return false;

LLVM_DEBUG(dbgs() << "IA: Found a load with deinterleave intrinsic " << *DI
<< " and factor = " << Factor << "\n");

// Try and match this with target specific intrinsics.
if (!TLI->lowerDeinterleaveIntrinsicToLoad(LI, DeinterleaveValues))
return false;
}

DeadInsts.insert(DeinterleaveDeadInsts.begin(), DeinterleaveDeadInsts.end());
// We now have a target-specific load, so delete the old one.
DeadInsts.insert(LI);
DeadInsts.insert(cast<Instruction>(LoadedVal));
return true;
}

bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
IntrinsicInst *II, SmallSetVector<Instruction *, 32> &DeadInsts) {
if (!II->hasOneUse())
return false;

StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));

if (!SI || !SI->isSimple())
Value *StoredBy = II->user_back();
if (!isa<StoreInst, VPIntrinsic>(StoredBy))
return false;

SmallVector<Value *, 8> InterleaveValues;
SmallVector<Instruction *, 8> InterleaveDeadInsts;
if (!getVectorInterleaveFactor(II, InterleaveValues, InterleaveDeadInsts))
return false;

LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II
<< " with factor = " << InterleaveValues.size() << "\n");
const unsigned Factor = InterleaveValues.size();

// Try and match this with target specific intrinsics.
if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
return false;
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
return false;

Value *WideMask = VPStore->getOperand(2);
Value *Mask = getMask(WideMask, Factor,
cast<VectorType>(InterleaveValues[0]->getType()));
if (!Mask)
return false;

LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic "
<< *II << " and factor = " << Factor << "\n");

// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
// TLI function to emit target-specific interleaved instruction.
if (!TLI->lowerInterleavedIntrinsicToVPStore(VPStore, Mask,
InterleaveValues))
return false;
} else {
auto *SI = cast<StoreInst>(StoredBy);
if (!SI->isSimple())
return false;

LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II
<< " and factor = " << Factor << "\n");

// Try and match this with target specific intrinsics.
if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
return false;
}

// We now have a target-specific store, so delete the old one.
DeadInsts.insert(SI);
DeadInsts.insert(cast<Instruction>(StoredBy));
DeadInsts.insert(InterleaveDeadInsts.begin(), InterleaveDeadInsts.end());
return true;
}
Expand Down
Loading
Loading