@@ -4824,12 +4824,46 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
4824
4824
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
4825
4825
}
4826
4826
4827
+ // Given a vector a, b, c, d return a vector Factor times longer
4828
+ // with Factor-1 undef's between elements. Ex:
4829
+ // a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
4830
+ // undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
4831
+ static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
4832
+ const SDLoc &DL, SelectionDAG &DAG) {
4833
+
4834
+ MVT VT = V.getSimpleValueType();
4835
+ unsigned EltBits = VT.getScalarSizeInBits();
4836
+ ElementCount EC = VT.getVectorElementCount();
4837
+ V = DAG.getBitcast(VT.changeTypeToInteger(), V);
4838
+
4839
+ MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
4840
+
4841
+ SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
4842
+ // TODO: On rv32, the constant becomes a splat_vector_parts which does not
4843
+ // allow the SHL to fold away if Index is 0.
4844
+ if (Index != 0)
4845
+ Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
4846
+ DAG.getConstant(EltBits * Index, DL, WideVT));
4847
+ // Make sure to use original element type
4848
+ MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(),
4849
+ EC.multiplyCoefficientBy(Factor));
4850
+ return DAG.getBitcast(ResultVT, Result);
4851
+ }
4852
+
4827
4853
// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
4828
4854
// to create an interleaved vector of <[vscale x] n*2 x ty>.
4829
4855
// This requires that the size of ty is less than the subtarget's maximum ELEN.
4830
4856
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
4831
4857
const SDLoc &DL, SelectionDAG &DAG,
4832
4858
const RISCVSubtarget &Subtarget) {
4859
+
4860
+ // FIXME: Not only does this optimize the code, it fixes some correctness
4861
+ // issues because MIR does not have freeze.
4862
+ if (EvenV.isUndef())
4863
+ return getWideningSpread(OddV, 2, 1, DL, DAG);
4864
+ if (OddV.isUndef())
4865
+ return getWideningSpread(EvenV, 2, 0, DL, DAG);
4866
+
4833
4867
MVT VecVT = EvenV.getSimpleValueType();
4834
4868
MVT VecContainerVT = VecVT; // <vscale x n x ty>
4835
4869
// Convert fixed vectors to scalable if needed
@@ -4861,29 +4895,14 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
4861
4895
SDValue Passthru = DAG.getUNDEF(WideContainerVT);
4862
4896
4863
4897
SDValue Interleaved;
4864
- if (OddV.isUndef()) {
4865
- // If OddV is undef, this is a zero extend.
4866
- // FIXME: Not only does this optimize the code, it fixes some correctness
4867
- // issues because MIR does not have freeze.
4868
- Interleaved =
4869
- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
4870
- } else if (Subtarget.hasStdExtZvbb()) {
4898
+ if (Subtarget.hasStdExtZvbb()) {
4871
4899
// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
4872
4900
SDValue OffsetVec =
4873
4901
DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
4874
4902
Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
4875
4903
OffsetVec, Passthru, Mask, VL);
4876
- if (!EvenV.isUndef())
4877
- Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4878
- Interleaved, EvenV, Passthru, Mask, VL);
4879
- } else if (EvenV.isUndef()) {
4880
- Interleaved =
4881
- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
4882
-
4883
- SDValue OffsetVec =
4884
- DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
4885
- Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
4886
- Interleaved, OffsetVec, Passthru, Mask, VL);
4904
+ Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
4905
+ Interleaved, EvenV, Passthru, Mask, VL);
4887
4906
} else {
4888
4907
// FIXME: We should freeze the odd vector here. We already handled the case
4889
4908
// of provably undef/poison above.
0 commit comments