Skip to content

Commit

Permalink
[ARM] Add a tail-predication loop predicate register
Browse files Browse the repository at this point in the history
The semantics of tail predication loops means that the value of LR as an
instruction is executed determines the predicate. In other words:

mov r3, #3
DLSTP lr, r3        // Start tail predication, lr==3
VADD.s32 q0, q1, q2 // Lanes 0,1 and 2 are updated in q0.
mov lr, #1
VADD.s32 q0, q1, q2 // Only first lane is updated.

This means that the value of lr cannot be spilled and re-used in tail
predication regions without potentially altering the behaviour of the
program. More lanes than required could be stored, for example, and in
the case of a gather those lanes might not have been setup, leading to
alignment exceptions.

This patch adds a new lr predicate operand to MVE instructions in order
to keep a reference to the lr that they use as a tail predicate. It will
usually hold the zeroreg meaning not predicated, being set to the LR phi
value in the MVETPAndVPTOptimisationsPass. This will prevent it from
being spilled anywhere that it needs to be used.

A lot of tests needed updating.

Differential Revision: https://reviews.llvm.org/D107638
  • Loading branch information
davemgreen authored and memfrob committed Oct 4, 2022
1 parent 7e4990b commit 5c5f2d4
Show file tree
Hide file tree
Showing 105 changed files with 2,833 additions and 2,753 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/CodeGen/MachineVerifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1653,6 +1653,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
report("Unspillable Terminator does not define a reg", MI);
Register Def = MI->getOperand(0).getReg();
if (Def.isVirtual() &&
!MF->getProperties().hasProperty(
MachineFunctionProperties::Property::NoPHIs) &&
std::distance(MRI->use_nodbg_begin(Def), MRI->use_nodbg_end()) > 1)
report("Unspillable Terminator expected to have at most one use!", MI);
}
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -867,6 +867,7 @@ void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,
void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {
MIB.addImm(ARMVCC::None);
MIB.addReg(0);
MIB.addReg(0); // tp_reg
}

void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
Expand All @@ -878,6 +879,7 @@ void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,
void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {
MIB.addImm(Cond);
MIB.addReg(ARM::VPR, RegState::Implicit);
MIB.addReg(0); // tp_reg
}

void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1822,8 +1822,11 @@ bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
else
return false;

SDValue Ops[] = {Base, NewOffset,
CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg,
SDValue Ops[] = {Base,
NewOffset,
CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
PredReg,
CurDAG->getRegister(0, MVT::i32), // tp_reg
Chain};
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
N->getValueType(0), MVT::Other, Ops);
Expand Down Expand Up @@ -2529,6 +2532,7 @@ void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
SDValue PredicateMask) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
Ops.push_back(PredicateMask);
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
}

template <typename SDValueVector>
Expand All @@ -2537,20 +2541,23 @@ void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
SDValue Inactive) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
Ops.push_back(PredicateMask);
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
Ops.push_back(Inactive);
}

template <typename SDValueVector>
void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
}

template <typename SDValueVector>
void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
EVT InactiveTy) {
Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
Ops.push_back(CurDAG->getRegister(0, MVT::i32));
Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
Ops.push_back(SDValue(
CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
}
Expand Down
7 changes: 5 additions & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11542,6 +11542,7 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
.addUse(PredCounterPhiReg)
.addImm(ARMVCC::None)
.addReg(0)
.addReg(0);

BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
Expand All @@ -11560,7 +11561,8 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
.addReg(SrcPhiReg)
.addImm(16)
.addImm(ARMVCC::Then)
.addUse(VccrReg);
.addUse(VccrReg)
.addReg(0);
} else
SrcValueReg = OpSrcReg;

Expand All @@ -11570,7 +11572,8 @@ static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
.addReg(DestPhiReg)
.addImm(16)
.addImm(ARMVCC::Then)
.addUse(VccrReg);
.addUse(VccrReg)
.addReg(0);

// Add the pseudoInstrs for decrementing the loop counter and marking the
// end:t2DoLoopDec and t2DoLoopEnd
Expand Down
12 changes: 6 additions & 6 deletions llvm/lib/Target/ARM/ARMInstrCDE.td
Original file line number Diff line number Diff line change
Expand Up @@ -612,30 +612,30 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Vec MQPR:$inactive), timm:$imm,
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX1_vec p_imm:$coproc, imm_12b:$imm, ARMVCCThen,
(VTI.Pred VCCR:$pred),
(VTI.Pred VCCR:$pred), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx1qa_predicated timm:$coproc,
(VTI.Vec MQPR:$acc), timm:$imm,
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX1A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
imm_12b:$imm, ARMVCCThen,
(VTI.Pred VCCR:$pred)))>;
(VTI.Pred VCCR:$pred), zero_reg))>;

def : Pat<(VTI.Vec (int_arm_cde_vcx2q_predicated timm:$coproc,
(VTI.Vec MQPR:$inactive),
(v16i8 MQPR:$n), timm:$imm,
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX2_vec p_imm:$coproc, (v16i8 MQPR:$n),
imm_7b:$imm, ARMVCCThen,
(VTI.Pred VCCR:$pred),
(VTI.Pred VCCR:$pred), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx2qa_predicated timm:$coproc,
(VTI.Vec MQPR:$acc),
(v16i8 MQPR:$n), timm:$imm,
(VTI.Pred VCCR:$pred))),
(VTI.Vec (CDE_VCX2A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
(v16i8 MQPR:$n), timm:$imm, ARMVCCThen,
(VTI.Pred VCCR:$pred)))>;
(VTI.Pred VCCR:$pred), zero_reg))>;

def : Pat<(VTI.Vec (int_arm_cde_vcx3q_predicated timm:$coproc,
(VTI.Vec MQPR:$inactive),
Expand All @@ -645,7 +645,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Vec (CDE_VCX3_vec p_imm:$coproc, (v16i8 MQPR:$n),
(v16i8 MQPR:$m),
imm_4b:$imm, ARMVCCThen,
(VTI.Pred VCCR:$pred),
(VTI.Pred VCCR:$pred), zero_reg,
(VTI.Vec MQPR:$inactive)))>;
def : Pat<(VTI.Vec (int_arm_cde_vcx3qa_predicated timm:$coproc,
(VTI.Vec MQPR:$acc),
Expand All @@ -654,7 +654,7 @@ multiclass VCXPredicatedPat_m<MVEVectorVTInfo VTI> {
(VTI.Vec (CDE_VCX3A_vec p_imm:$coproc, (VTI.Vec MQPR:$acc),
(v16i8 MQPR:$n), (v16i8 MQPR:$m),
imm_4b:$imm, ARMVCCThen,
(VTI.Pred VCCR:$pred)))>;
(VTI.Pred VCCR:$pred), zero_reg))>;
}

let Predicates = [HasCDE, HasMVEInt] in
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/ARM/ARMInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -249,10 +249,10 @@ def VPTPredROperand : AsmOperandClass {

// Base class for both kinds of vpred.
class vpred_ops<dag extra_op, dag extra_mi> : OperandWithDefaultOps<OtherVT,
!con((ops (i32 0), (i32 zero_reg)), extra_op)> {
!con((ops (i32 0), (i32 zero_reg), (i32 zero_reg)), extra_op)> {
let PrintMethod = "printVPTPredicateOperand";
let OperandNamespace = "ARM";
let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg), extra_mi);
let MIOperandInfo = !con((ops i32imm:$cond, VCCR:$cond_reg, GPRlr:$tp_reg), extra_mi);

// For convenience, we provide a string value that can be appended
// to the constraints string. It's empty for vpred_n, and for
Expand Down
Loading

0 comments on commit 5c5f2d4

Please sign in to comment.