Skip to content

Commit 66a0a08

Browse files
authored
[RISCV] Extract spread(2,4,8) shuffle lowering from interleave(2) (llvm#118822)
This is a prep patch for improving spread(4,8) shuffles. I also think it improves the readability of the existing code, but the primary motivation is simply staging work.
1 parent 91354fb commit 66a0a08

File tree

3 files changed

+41
-22
lines changed

3 files changed

+41
-22
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+37-18
Original file line numberDiff line numberDiff line change
@@ -4824,12 +4824,46 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
48244824
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
48254825
}
48264826

4827+
// Given a vector a, b, c, d return a vector Factor times longer
4828+
// with Factor-1 undef's between elements. Ex:
4829+
// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4830+
// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4831+
static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4832+
const SDLoc &DL, SelectionDAG &DAG) {
4833+
4834+
MVT VT = V.getSimpleValueType();
4835+
unsigned EltBits = VT.getScalarSizeInBits();
4836+
ElementCount EC = VT.getVectorElementCount();
4837+
V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4838+
4839+
MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4840+
4841+
SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4842+
// TODO: On rv32, the constant becomes a splat_vector_parts which does not
4843+
// allow the SHL to fold away if Index is 0.
4844+
if (Index != 0)
4845+
Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4846+
DAG.getConstant(EltBits * Index, DL, WideVT));
4847+
// Make sure to use original element type
4848+
MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(),
4849+
EC.multiplyCoefficientBy(Factor));
4850+
return DAG.getBitcast(ResultVT, Result);
4851+
}
4852+
48274853
// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
48284854
// to create an interleaved vector of <[vscale x] n*2 x ty>.
48294855
// This requires that the size of ty is less than the subtarget's maximum ELEN.
48304856
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
48314857
const SDLoc &DL, SelectionDAG &DAG,
48324858
const RISCVSubtarget &Subtarget) {
4859+
4860+
// FIXME: Not only does this optimize the code, it fixes some correctness
4861+
// issues because MIR does not have freeze.
4862+
if (EvenV.isUndef())
4863+
return getWideningSpread(OddV, 2, 1, DL, DAG);
4864+
if (OddV.isUndef())
4865+
return getWideningSpread(EvenV, 2, 0, DL, DAG);
4866+
48334867
MVT VecVT = EvenV.getSimpleValueType();
48344868
MVT VecContainerVT = VecVT; // <vscale x n x ty>
48354869
// Convert fixed vectors to scalable if needed
@@ -4861,29 +4895,14 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
48614895
SDValue Passthru = DAG.getUNDEF(WideContainerVT);
48624896

48634897
SDValue Interleaved;
4864-
if (OddV.isUndef()) {
4865-
// If OddV is undef, this is a zero extend.
4866-
// FIXME: Not only does this optimize the code, it fixes some correctness
4867-
// issues because MIR does not have freeze.
4868-
Interleaved =
4869-
DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4870-
} else if (Subtarget.hasStdExtZvbb()) {
4898+
if (Subtarget.hasStdExtZvbb()) {
48714899
// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
48724900
SDValue OffsetVec =
48734901
DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
48744902
Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
48754903
OffsetVec, Passthru, Mask, VL);
4876-
if (!EvenV.isUndef())
4877-
Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4878-
Interleaved, EvenV, Passthru, Mask, VL);
4879-
} else if (EvenV.isUndef()) {
4880-
Interleaved =
4881-
DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4882-
4883-
SDValue OffsetVec =
4884-
DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4885-
Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4886-
Interleaved, OffsetVec, Passthru, Mask, VL);
4904+
Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4905+
Interleaved, EvenV, Passthru, Mask, VL);
48874906
} else {
48884907
// FIXME: We should freeze the odd vector here. We already handled the case
48894908
// of provably undef/poison above.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -247,12 +247,12 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
247247
; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma
248248
; V128-NEXT: vslidedown.vi v24, v16, 16
249249
; V128-NEXT: li a0, 32
250-
; V128-NEXT: vslidedown.vi v0, v8, 16
251250
; V128-NEXT: lui a1, 699051
251+
; V128-NEXT: vslidedown.vi v0, v8, 16
252252
; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma
253253
; V128-NEXT: vzext.vf2 v8, v24
254-
; V128-NEXT: vzext.vf2 v24, v0
255254
; V128-NEXT: addi a1, a1, -1366
255+
; V128-NEXT: vzext.vf2 v24, v0
256256
; V128-NEXT: vmv.s.x v0, a1
257257
; V128-NEXT: vsll.vx v8, v8, a0
258258
; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -410,12 +410,12 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
410410
; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma
411411
; V128-NEXT: vslidedown.vi v24, v16, 16
412412
; V128-NEXT: li a0, 32
413-
; V128-NEXT: vslidedown.vi v0, v8, 16
414413
; V128-NEXT: lui a1, 699051
414+
; V128-NEXT: vslidedown.vi v0, v8, 16
415415
; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma
416416
; V128-NEXT: vzext.vf2 v8, v24
417-
; V128-NEXT: vzext.vf2 v24, v0
418417
; V128-NEXT: addi a1, a1, -1366
418+
; V128-NEXT: vzext.vf2 v24, v0
419419
; V128-NEXT: vmv.s.x v0, a1
420420
; V128-NEXT: vsll.vx v8, v8, a0
421421
; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma

0 commit comments

Comments
 (0)