diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 75fa2d9e8e2a48..bf985f11536890 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3646,6 +3646,10 @@ class Compiler GenTree* gtFoldTypeCompare(GenTree* tree); GenTree* gtFoldTypeEqualityCall(bool isEq, GenTree* op1, GenTree* op2); +#if defined(FEATURE_HW_INTRINSICS) + GenTree* gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree); +#endif // FEATURE_HW_INTRINSICS + // Options to control behavior of gtTryRemoveBoxUpstreamEffects enum BoxRemovalOptions { diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index 98d6bb788d450c..744a36964d2754 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -13599,6 +13599,14 @@ GenTree* Compiler::gtFoldExpr(GenTree* tree) { return gtFoldExprConditional(tree); } + +#if defined(FEATURE_HW_INTRINSICS) + if (tree->OperIsHWIntrinsic()) + { + return gtFoldExprHWIntrinsic(tree->AsHWIntrinsic()); + } +#endif // FEATURE_HW_INTRINSICS + return tree; } @@ -18353,6 +18361,282 @@ bool Compiler::IsValidForShuffle(GenTreeVecCon* vecCon, unsigned simdSize, var_t return true; } + +//------------------------------------------------------------------------ +// GenTreeVecCon::EvaluateUnaryInPlace: Evaluates this constant using the given operation +// +// Arguments: +// oper - the operation to use in the evaluation +// scalar - true if this is a scalar operation; otherwise, false +// simdType - the size of the constant being checked +// baseType - the base type of the constant being checked +// +void GenTreeVecCon::EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types simdType, var_types baseType) +{ + switch (simdType) + { + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd8Val); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd12Val); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd16Val); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd32Val); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateUnarySimd(oper, scalar, baseType, &result, gtSimd64Val); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH + + default: + { + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::EvaluateUnaryInPlace: Evaluates this constant using the given operation +// +// Arguments: +// oper - the operation to use in the evaluation +// scalar - true if this is a scalar operation; otherwise, false +// simdType - the size of the constant being checked +// baseType - the base type of the constant being checked +// other - the other vector constant to use in the evaluation +// +void GenTreeVecCon::EvaluateBinaryInPlace( + genTreeOps oper, bool scalar, var_types simdType, var_types baseType, GenTreeVecCon* other) +{ + switch (simdType) + { + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd8Val, other->gtSimd8Val); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd12Val, other->gtSimd12Val); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd16Val, other->gtSimd16Val); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd32Val, other->gtSimd32Val); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateBinarySimd(oper, scalar, baseType, &result, gtSimd64Val, other->gtSimd64Val); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH + + default: + { + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::EvaluateBroadcastInPlace: Evaluates this constant using a broadcast +// +// Arguments: +// simdType - the size of the constant being checked +// baseType - the base type of the constant being checked +// scalar - the value to broadcast as part of the evaluation +// +void GenTreeVecCon::EvaluateBroadcastInPlace(var_types simdType, var_types baseType, double scalar) +{ + switch (baseType) + { + case TYP_FLOAT: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + case TYP_DOUBLE: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + default: + { + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::EvaluateBroadcastInPlace: Evaluates this constant using a broadcast +// +// Arguments: +// simdType - the size of the constant being checked +// baseType - the base type of the constant being checked +// scalar - the value to broadcast as part of the evaluation +// +void GenTreeVecCon::EvaluateBroadcastInPlace(var_types simdType, var_types baseType, int64_t scalar) +{ + switch (baseType) + { + case TYP_BYTE: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + case TYP_UBYTE: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + case TYP_SHORT: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + case TYP_USHORT: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + case TYP_INT: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + case TYP_UINT: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + case TYP_LONG: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + case TYP_ULONG: + { + EvaluateBroadcastInPlace(simdType, static_cast(scalar)); + break; + } + + default: + { + unreached(); + } + } +} + +//------------------------------------------------------------------------ +// GenTreeVecCon::IsBroadcast: Determines if this vector constant is a broadcast +// +// Arguments: +// simdType - the size of the constant being checked +// simdBaseType - the base type of the constant being checked +// +// Returns: +// true if the constant represents a broadcast value; otherwise, false +// +bool GenTreeVecCon::IsBroadcast(var_types simdType, var_types simdBaseType) const +{ + assert(varTypeIsSIMD(simdType)); + assert(varTypeIsArithmetic(simdBaseType)); + + int elementCount = ElementCount(genTypeSize(simdType), simdBaseType); + + switch (simdBaseType) + { + case TYP_BYTE: + case TYP_UBYTE: + { + return ElementsAreSame(>SimdVal.u8[0], elementCount); + } + + case TYP_SHORT: + case TYP_USHORT: + { + return ElementsAreSame(>SimdVal.u16[0], elementCount); + } + + case TYP_FLOAT: + case TYP_INT: + case TYP_UINT: + { + return ElementsAreSame(>SimdVal.u32[0], elementCount); + } + + case TYP_DOUBLE: + case TYP_LONG: + case TYP_ULONG: + { + return ElementsAreSame(>SimdVal.u64[0], elementCount); + } + + default: + { + return false; + } + } +} #endif // FEATURE_HW_INTRINSICS*/ //------------------------------------------------------------------------ @@ -25369,8 +25653,6 @@ GenTree* Compiler::gtNewSimdShuffleNode( if (simdSize == 16) { lookupIntrinsic = NI_AdvSimd_Arm64_VectorTableLookup; - - op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, NI_Vector64_ToVector128, simdBaseJitType, simdSize); } // VectorTableLookup is only valid on byte/sbyte @@ -27165,8 +27447,9 @@ bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const // bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic() const { - genTreeOps Oper = HWOperGet(); - return Oper == GT_AND || Oper == GT_OR || Oper == GT_XOR || Oper == GT_AND_NOT; + bool isScalar = false; + genTreeOps oper = HWOperGet(&isScalar); + return (oper == GT_AND) || (oper == GT_AND_NOT) || (oper == GT_NOT) || (oper == GT_OR) || (oper == GT_XOR); } //------------------------------------------------------------------------ @@ -27516,8 +27799,10 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId) //------------------------------------------------------------------------------ // HWOperGet : Returns Oper based on the HWIntrinsicId // -genTreeOps GenTreeHWIntrinsic::HWOperGet() const +genTreeOps GenTreeHWIntrinsic::HWOperGet(bool* isScalar) const { + *isScalar = false; + switch (GetHWIntrinsicId()) { #if defined(TARGET_XARCH) @@ -27580,52 +27865,289 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet() const case NI_AVX512F_AndNot: case NI_AVX512DQ_AndNot: case NI_AVX10v1_V512_AndNot: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_BitwiseClear: +#endif { return GT_AND_NOT; } -#endif - // TODO: Handle other cases - default: +#if defined(TARGET_XARCH) + case NI_SSE_Add: + case NI_SSE2_Add: + case NI_AVX_Add: + case NI_AVX2_Add: + case NI_AVX512F_Add: + case NI_AVX512BW_Add: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_Add: + case NI_AdvSimd_Arm64_Add: +#endif { - return GT_NONE; + return GT_ADD; } - } -} - -#endif // FEATURE_HW_INTRINSICS -//--------------------------------------------------------------------------------------- -// gtNewMustThrowException: -// create a throw node (calling into JIT helper) that must be thrown. -// The result would be a comma node: COMMA(jithelperthrow(void), x) where x's type should be specified. -// -// Arguments -// helper - JIT helper ID -// type - return type of the node -// -// Return Value -// pointer to the throw node -// -GenTree* Compiler::gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd) -{ - GenTreeCall* node = gtNewHelperCallNode(helper, TYP_VOID); - assert(node->IsNoReturn()); - if (type != TYP_VOID) - { - unsigned dummyTemp = lvaGrabTemp(true DEBUGARG("dummy temp of must thrown exception")); - if (type == TYP_STRUCT) +#if defined(TARGET_XARCH) + case NI_SSE_AddScalar: + case NI_SSE2_AddScalar: + case NI_AVX512F_AddScalar: + case NI_AVX10v1_AddScalar: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_AddScalar: +#endif { - lvaSetStruct(dummyTemp, clsHnd, false); - type = lvaTable[dummyTemp].lvType; // struct type is normalized + *isScalar = true; + return GT_ADD; } - else + +#if defined(TARGET_XARCH) + case NI_SSE_Divide: + case NI_SSE2_Divide: + case NI_AVX_Divide: + case NI_AVX512F_Divide: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_Arm64_Divide: +#endif { - lvaTable[dummyTemp].lvType = type; + return GT_DIV; } - GenTree* dummyNode = gtNewLclvNode(dummyTemp, type); - return gtNewOperNode(GT_COMMA, type, node, dummyNode); - } + +#if defined(TARGET_XARCH) + case NI_SSE_DivideScalar: + case NI_SSE2_DivideScalar: + case NI_AVX512F_DivideScalar: + case NI_AVX10v1_DivideScalar: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_DivideScalar: +#endif + { + *isScalar = true; + return GT_DIV; + } + +#if defined(TARGET_XARCH) + case NI_SSE_Multiply: + case NI_SSE2_MultiplyLow: + case NI_SSE41_MultiplyLow: + case NI_AVX_Multiply: + case NI_AVX2_MultiplyLow: + case NI_AVX512F_MultiplyLow: + case NI_AVX512BW_MultiplyLow: + case NI_AVX512DQ_MultiplyLow: + case NI_AVX512DQ_VL_MultiplyLow: + case NI_AVX10v1_MultiplyLow: + case NI_AVX10v1_V512_MultiplyLow: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_Multiply: + case NI_AdvSimd_Arm64_Multiply: +#endif + { + return GT_MUL; + } + +#if defined(TARGET_XARCH) + case NI_SSE2_Multiply: + case NI_AVX512F_Multiply: + { + if (varTypeIsFloating(GetSimdBaseType())) + { + return GT_MUL; + } + return GT_NONE; + } +#endif + +#if defined(TARGET_XARCH) + case NI_SSE_MultiplyScalar: + case NI_SSE2_MultiplyScalar: + case NI_AVX512F_MultiplyScalar: + case NI_AVX10v1_MultiplyScalar: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_MultiplyScalar: +#endif + { + *isScalar = true; + return GT_MUL; + } + +#if defined(TARGET_ARM64) + case NI_AdvSimd_Negate: + case NI_AdvSimd_Arm64_Negate: + { + return GT_NEG; + } + + case NI_AdvSimd_NegateScalar: + case NI_AdvSimd_Arm64_NegateScalar: + { + *isScalar = true; + return GT_NEG; + } +#endif + +#if defined(TARGET_XARCH) + case NI_AVX512F_RotateLeft: + case NI_AVX512F_RotateLeftVariable: + case NI_AVX512F_VL_RotateLeft: + case NI_AVX512F_VL_RotateLeftVariable: + case NI_AVX10v1_RotateLeft: + case NI_AVX10v1_RotateLeftVariable: + { + return GT_ROL; + } + + case NI_AVX512F_RotateRight: + case NI_AVX512F_RotateRightVariable: + case NI_AVX512F_VL_RotateRight: + case NI_AVX512F_VL_RotateRightVariable: + case NI_AVX10v1_RotateRight: + case NI_AVX10v1_RotateRightVariable: + { + return GT_ROR; + } +#endif // TARGET_XARCH + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftLeftLogical: +#else + case NI_SSE2_ShiftLeftLogical: + case NI_AVX2_ShiftLeftLogical: + case NI_AVX2_ShiftLeftLogicalVariable: + case NI_AVX512F_ShiftLeftLogical: + case NI_AVX512F_ShiftLeftLogicalVariable: + case NI_AVX512BW_ShiftLeftLogical: + case NI_AVX512BW_ShiftLeftLogicalVariable: +#endif + { + return GT_LSH; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftLeftLogicalScalar: + { + *isScalar = true; + return GT_LSH; + } +#endif + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightArithmetic: +#else + case NI_SSE2_ShiftRightArithmetic: + case NI_AVX2_ShiftRightArithmetic: + case NI_AVX2_ShiftRightArithmeticVariable: + case NI_AVX512F_ShiftRightArithmetic: + case NI_AVX512F_ShiftRightArithmeticVariable: + case NI_AVX512F_VL_ShiftRightArithmetic: + case NI_AVX512F_VL_ShiftRightArithmeticVariable: + case NI_AVX512BW_ShiftRightArithmetic: + case NI_AVX512BW_ShiftRightArithmeticVariable: + case NI_AVX10v1_ShiftRightArithmetic: + case NI_AVX10v1_ShiftRightArithmeticVariable: +#endif + { + return GT_RSH; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightArithmeticScalar: + { + *isScalar = true; + return GT_RSH; + } +#endif + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightLogical: +#else + case NI_SSE2_ShiftRightLogical: + case NI_AVX2_ShiftRightLogical: + case NI_AVX2_ShiftRightLogicalVariable: + case NI_AVX512F_ShiftRightLogical: + case NI_AVX512F_ShiftRightLogicalVariable: + case NI_AVX512BW_ShiftRightLogical: + case NI_AVX512BW_ShiftRightLogicalVariable: +#endif + { + return GT_RSZ; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_ShiftRightLogicalScalar: + { + *isScalar = true; + return GT_RSZ; + } +#endif + +#if defined(TARGET_XARCH) + case NI_SSE_Subtract: + case NI_SSE2_Subtract: + case NI_AVX_Subtract: + case NI_AVX2_Subtract: + case NI_AVX512F_Subtract: + case NI_AVX512BW_Subtract: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_Subtract: + case NI_AdvSimd_Arm64_Subtract: +#endif + { + return GT_SUB; + } + +#if defined(TARGET_XARCH) + case NI_SSE_SubtractScalar: + case NI_SSE2_SubtractScalar: + case NI_AVX512F_SubtractScalar: + case NI_AVX10v1_SubtractScalar: +#elif defined(TARGET_ARM64) + case NI_AdvSimd_SubtractScalar: +#endif + { + *isScalar = true; + return GT_SUB; + } + + default: + { + return GT_NONE; + } + } +} + +#endif // FEATURE_HW_INTRINSICS + +//--------------------------------------------------------------------------------------- +// gtNewMustThrowException: +// create a throw node (calling into JIT helper) that must be thrown. +// The result would be a comma node: COMMA(jithelperthrow(void), x) where x's type should be specified. +// +// Arguments +// helper - JIT helper ID +// type - return type of the node +// +// Return Value +// pointer to the throw node +// +GenTree* Compiler::gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd) +{ + GenTreeCall* node = gtNewHelperCallNode(helper, TYP_VOID); + assert(node->IsNoReturn()); + if (type != TYP_VOID) + { + unsigned dummyTemp = lvaGrabTemp(true DEBUGARG("dummy temp of must thrown exception")); + if (type == TYP_STRUCT) + { + lvaSetStruct(dummyTemp, clsHnd, false); + type = lvaTable[dummyTemp].lvType; // struct type is normalized + } + else + { + lvaTable[dummyTemp].lvType = type; + } + GenTree* dummyNode = gtNewLclvNode(dummyTemp, type); + return gtNewOperNode(GT_COMMA, type, node, dummyNode); + } return node; } @@ -28410,8 +28932,13 @@ uint8_t GenTreeHWIntrinsic::GetTernaryControlByte(GenTreeHWIntrinsic* second) co const uint8_t B = 0xCC; const uint8_t C = 0xAA; - genTreeOps firstOper = HWOperGet(); - genTreeOps secondOper = second->HWOperGet(); + bool isScalar = false; + + genTreeOps firstOper = HWOperGet(&isScalar); + assert(!isScalar); + + genTreeOps secondOper = second->HWOperGet(&isScalar); + assert(!isScalar); uint8_t AB = 0; uint8_t ABC = 0; @@ -28605,3 +29132,818 @@ bool GenTree::CanDivOrModPossiblyOverflow(Compiler* comp) const // Not enough known information; therefore we might overflow. return true; } + +#if defined(FEATURE_HW_INTRINSICS) +GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) +{ + assert(tree->OperIsHWIntrinsic()); + + // NOTE: MinOpts() is always true for Tier0 so we have to check explicit flags instead. + // To be fixed in https://github.com/dotnet/runtime/pull/77465 + const bool tier0opts = !opts.compDbgCode && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_MIN_OPT); + if (!tier0opts) + { + return tree; + } + + GenTree* op1 = nullptr; + GenTree* op2 = nullptr; + GenTree* op3 = nullptr; + size_t opCount = tree->GetOperandCount(); + + switch (opCount) + { + case 3: + { + op3 = tree->Op(3); + FALLTHROUGH; + } + + case 2: + { + op2 = tree->Op(2); + FALLTHROUGH; + } + + case 1: + { + op1 = tree->Op(1); + break; + } + + default: + { + return tree; + } + } + + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); + +#if defined(TARGET_XARCH) + if (oper == GT_AND_NOT) + { + // xarch does: ~op1 & op2, we need op1 & ~op2 + std::swap(op1, op2); + } +#endif // TARGET_XARCH + + GenTree* cnsNode = nullptr; + GenTree* otherNode = nullptr; + + if (op1->OperIsConst()) + { + cnsNode = op1; + otherNode = op2; + } + else if ((op2 != nullptr) && op2->OperIsConst()) + { + cnsNode = op2; + otherNode = op1; + } + else + { + // No constants, so nothing to fold + return tree; + } + + GenTree* resultNode = tree; + + NamedIntrinsic ni = tree->GetHWIntrinsicId(); + var_types retType = tree->TypeGet(); + var_types simdBaseType = tree->GetSimdBaseType(); + CorInfoType simdBaseJitType = tree->GetSimdBaseJitType(); + unsigned int simdSize = tree->GetSimdSize(); + bool isNewNode = false; + + if (otherNode == nullptr) + { + assert(op2 == nullptr); + assert(op3 == nullptr); + + if (oper != GT_NONE) + { + cnsNode->AsVecCon()->EvaluateUnaryInPlace(oper, isScalar, retType, simdBaseType); + resultNode = cnsNode; + } + else + { + switch (ni) + { +#ifdef TARGET_ARM64 + case NI_ArmBase_LeadingZeroCount: +#else + case NI_LZCNT_LeadingZeroCount: +#endif + { + assert(!varTypeIsSmall(retType) && !varTypeIsLong(retType)); + + int32_t value = static_cast(cnsNode->AsIntConCommon()->IconValue()); + uint32_t result = BitOperations::LeadingZeroCount(static_cast(value)); + + cnsNode->AsIntConCommon()->SetIconValue(static_cast(result)); + resultNode = cnsNode; + break; + } + +#ifdef TARGET_ARM64 + case NI_ArmBase_Arm64_LeadingZeroCount: + { + assert(varTypeIsInt(retType)); + + int64_t value = cnsNode->AsIntConCommon()->IntegralValue(); + uint32_t result = BitOperations::LeadingZeroCount(static_cast(value)); + + cnsNode->AsIntConCommon()->SetIconValue(static_cast(result)); + resultNode = cnsNode; + break; + } +#else + case NI_LZCNT_X64_LeadingZeroCount: + { + assert(varTypeIsLong(retType)); + + int64_t value = cnsNode->AsIntConCommon()->IntegralValue(); + uint32_t result = BitOperations::LeadingZeroCount(static_cast(value)); + + cnsNode->AsIntConCommon()->SetIntegralValue(static_cast(result)); + resultNode = cnsNode; + break; + } +#endif + + case NI_Vector128_AsVector3: + case NI_Vector128_AsVector128Unsafe: +#ifdef TARGET_ARM64 + case NI_Vector64_ToVector128Unsafe: + case NI_Vector128_GetLower: +#else + case NI_Vector128_AsVector2: + case NI_Vector128_ToVector256Unsafe: + case NI_Vector256_GetLower: + case NI_Vector256_ToVector512Unsafe: + case NI_Vector512_GetLower: + case NI_Vector512_GetLower128: +#endif + { + // These are all going to a smaller type taking the lowest bits + // or are unsafely going to a larger type, so we just need to retype + // the constant and we're good to go. + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } + +#ifdef TARGET_ARM64 + case NI_Vector64_ToVector128: + { + assert(retType == TYP_SIMD16); + assert(cnsNode->gtType == TYP_SIMD8); + cnsNode->AsVecCon()->gtSimd16Val.v64[1] = {}; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } +#else + case NI_Vector128_ToVector256: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd32Val.v128[1] = {}; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } + + case NI_Vector128_ToVector512: + { + assert(retType == TYP_SIMD64); + assert(cnsNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd64Val.v128[1] = {}; + cnsNode->AsVecCon()->gtSimd64Val.v256[1] = {}; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } + + case NI_Vector256_ToVector512: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD32); + cnsNode->AsVecCon()->gtSimd64Val.v256[1] = {}; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } +#endif + +#ifdef TARGET_ARM64 + case NI_Vector128_GetUpper: + { + assert(retType == TYP_SIMD8); + assert(cnsNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd8Val = cnsNode->AsVecCon()->gtSimd16Val.v64[1]; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } +#else + case NI_Vector256_GetUpper: + { + assert(retType == TYP_SIMD16); + assert(cnsNode->gtType == TYP_SIMD32); + cnsNode->AsVecCon()->gtSimd16Val = cnsNode->AsVecCon()->gtSimd32Val.v128[1]; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } + + case NI_Vector512_GetUpper: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD64); + cnsNode->AsVecCon()->gtSimd32Val = cnsNode->AsVecCon()->gtSimd64Val.v256[1]; + + cnsNode->gtType = retType; + resultNode = cnsNode; + break; + } +#endif + + case NI_Vector128_ToScalar: +#ifdef TARGET_ARM64 + case NI_Vector64_ToScalar: +#else + case NI_Vector256_ToScalar: + case NI_Vector512_ToScalar: +#endif + { + var_types simdType = getSIMDTypeForSize(simdSize); + + if (varTypeIsFloating(retType)) + { + double result = cnsNode->AsVecCon()->ToScalarFloating(simdType, simdBaseType); + + resultNode = gtNewDconNode(result, retType); + isNewNode = true; + } + else + { + assert(varTypeIsIntegral(retType)); + int64_t result = cnsNode->AsVecCon()->ToScalarIntegral(simdType, simdBaseType); + + if (varTypeIsLong(retType)) + { + resultNode = gtNewLconNode(result); + isNewNode = true; + } + else + { + resultNode = gtNewIconNode(static_cast(result), retType); + isNewNode = true; + } + } + break; + } + + default: + { + break; + } + } + } + } + else if (otherNode->OperIsConst()) + { + if (oper != GT_NONE) + { + assert(op3 == nullptr); + +#if defined(TARGET_XARCH) + if ((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ)) + { + if (otherNode->TypeIs(TYP_SIMD16)) + { + if ((ni != NI_AVX2_ShiftLeftLogicalVariable) && (ni != NI_AVX2_ShiftRightArithmeticVariable) && + (ni != NI_AVX512F_VL_ShiftRightArithmeticVariable) && + (ni != NI_AVX10v1_ShiftRightArithmeticVariable) && (ni != NI_AVX2_ShiftRightLogicalVariable)) + { + // The xarch shift instructions support taking the shift amount as + // a simd16, in which case they take the shift amount from the lower + // 64-bits. + + int64_t shiftAmount = otherNode->AsVecCon()->GetElementIntegral(TYP_SIMD16, TYP_LONG, 0); + + if ((genTypeSize(simdBaseType) != 8) && (shiftAmount > INT_MAX)) + { + // Ensure we don't lose track the the amount is an overshift + shiftAmount = -1; + } + otherNode->AsVecCon()->EvaluateBroadcastInPlace(retType, simdBaseType, shiftAmount); + } + } + } +#endif // TARGET_XARCH + + if (otherNode->IsIntegralConst()) + { + int64_t scalar = otherNode->AsIntConCommon()->IntegralValue(); + + otherNode = gtNewVconNode(retType); + isNewNode = true; + + otherNode->AsVecCon()->EvaluateBroadcastInPlace(retType, simdBaseType, scalar); + } + + cnsNode->AsVecCon()->EvaluateBinaryInPlace(oper, isScalar, retType, simdBaseType, otherNode->AsVecCon()); + resultNode = cnsNode; + } + else + { + switch (ni) + { + case NI_Vector128_GetElement: +#ifdef TARGET_ARM64 + case NI_Vector64_GetElement: +#else + case NI_Vector256_GetElement: + case NI_Vector512_GetElement: +#endif + { + assert(op3 == nullptr); + uint32_t index = static_cast(otherNode->AsIntConCommon()->IconValue()); + + if (index >= GenTreeVecCon::ElementCount(simdSize, simdBaseType)) + { + // Nothing to fold for out of range indexes + break; + } + + var_types simdType = getSIMDTypeForSize(simdSize); + + if (varTypeIsFloating(retType)) + { + double result = cnsNode->AsVecCon()->GetElementFloating(simdType, simdBaseType, index); + + resultNode = gtNewDconNode(result, retType); + isNewNode = true; + } + else + { + assert(varTypeIsIntegral(retType)); + int64_t result = cnsNode->AsVecCon()->GetElementIntegral(simdType, simdBaseType, index); + + if (varTypeIsLong(retType)) + { + resultNode = gtNewLconNode(result); + isNewNode = true; + } + else + { + resultNode = gtNewIconNode(static_cast(result), retType); + isNewNode = true; + } + } + break; + } + +#ifdef TARGET_ARM64 + case NI_AdvSimd_MultiplyByScalar: + case NI_AdvSimd_Arm64_MultiplyByScalar: + { + assert(op3 == nullptr); + + // MultiplyByScalar takes a vector as the second operand but only utilizes element 0 + // We need to extract it and then functionally broadcast it up for the evaluation to + // work as expected. + + if (varTypeIsFloating(simdBaseType)) + { + double scalar = otherNode->AsVecCon()->ToScalarFloating(retType, simdBaseType); + otherNode->AsVecCon()->EvaluateBroadcastInPlace(retType, simdBaseType, scalar); + } + else + { + assert(varTypeIsIntegral(simdBaseType)); + int64_t scalar = otherNode->AsVecCon()->ToScalarIntegral(retType, simdBaseType); + otherNode->AsVecCon()->EvaluateBroadcastInPlace(retType, simdBaseType, scalar); + } + + cnsNode->AsVecCon()->EvaluateBinaryInPlace(GT_MUL, isScalar, retType, simdBaseType, + otherNode->AsVecCon()); + resultNode = cnsNode; + break; + } +#endif + + case NI_Vector128_WithElement: +#ifdef TARGET_ARM64 + case NI_Vector64_WithElement: +#else + case NI_Vector256_WithElement: + case NI_Vector512_WithElement: +#endif + { + if (!op3->OperIsConst()) + { + break; + } + + uint32_t index = static_cast(op2->AsIntConCommon()->IconValue()); + + if (index >= GenTreeVecCon::ElementCount(simdSize, simdBaseType)) + { + // Nothing to fold for out of range indexes + break; + } + + var_types simdType = getSIMDTypeForSize(simdSize); + + if (varTypeIsFloating(simdBaseType)) + { + double value = op3->AsDblCon()->DconValue(); + cnsNode->AsVecCon()->SetElementFloating(simdType, simdBaseType, index, value); + resultNode = cnsNode; + } + else + { + assert(varTypeIsIntegral(simdBaseType)); + int64_t value = op3->AsIntConCommon()->IntegralValue(); + cnsNode->AsVecCon()->SetElementIntegral(simdType, simdBaseType, index, value); + resultNode = cnsNode; + } + break; + } + +#ifdef TARGET_ARM64 + case NI_Vector128_WithLower: + { + assert(retType == TYP_SIMD16); + assert(cnsNode->gtType == TYP_SIMD16); + assert(otherNode->gtType == TYP_SIMD8); + cnsNode->AsVecCon()->gtSimd16Val.v64[0] = otherNode->AsVecCon()->gtSimd8Val; + + resultNode = cnsNode; + break; + } +#else + case NI_Vector256_WithLower: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD32); + assert(otherNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd32Val.v128[0] = otherNode->AsVecCon()->gtSimd16Val; + + resultNode = cnsNode; + break; + } + + case NI_Vector512_WithLower: + { + assert(retType == TYP_SIMD64); + assert(cnsNode->gtType == TYP_SIMD64); + assert(otherNode->gtType == TYP_SIMD32); + cnsNode->AsVecCon()->gtSimd64Val.v256[0] = otherNode->AsVecCon()->gtSimd32Val; + + resultNode = cnsNode; + break; + } +#endif + +#ifdef TARGET_ARM64 + case NI_Vector128_WithUpper: + { + assert(retType == TYP_SIMD16); + assert(cnsNode->gtType == TYP_SIMD16); + assert(otherNode->gtType == TYP_SIMD8); + cnsNode->AsVecCon()->gtSimd16Val.v64[1] = otherNode->AsVecCon()->gtSimd8Val; + + resultNode = cnsNode; + break; + } +#else + case NI_Vector256_WithUpper: + { + assert(retType == TYP_SIMD32); + assert(cnsNode->gtType == TYP_SIMD32); + assert(otherNode->gtType == TYP_SIMD16); + cnsNode->AsVecCon()->gtSimd32Val.v128[1] = otherNode->AsVecCon()->gtSimd16Val; + + resultNode = cnsNode; + break; + } + + case NI_Vector512_WithUpper: + { + assert(retType == TYP_SIMD64); + assert(cnsNode->gtType == TYP_SIMD64); + assert(otherNode->gtType == TYP_SIMD32); + cnsNode->AsVecCon()->gtSimd64Val.v256[1] = otherNode->AsVecCon()->gtSimd32Val; + + resultNode = cnsNode; + break; + } +#endif + + default: + { + break; + } + } + } + } + else if (op3 == nullptr) + { + bool otherNodeHasSideEffects = (otherNode->gtFlags & GTF_SIDE_EFFECT) != 0; + + switch (oper) + { + case GT_ADD: + { + if (varTypeIsFloating(simdBaseType)) + { + // Not safe for floating-point when x == -0.0 + break; + } + + // Handle `x + 0 == x` and `0 + x == x` + if (cnsNode->IsVectorZero()) + { + resultNode = otherNode; + } + break; + } + + case GT_AND: + { + // Handle `x & 0 == 0` and `0 & x == 0` + if (cnsNode->IsVectorZero()) + { + if (!otherNodeHasSideEffects) + { + // We need to preserve side effects when they exist + resultNode = cnsNode; + } + break; + } + + // Handle `x & AllBitsSet == x` and `AllBitsSet & x == x` + if (cnsNode->IsVectorAllBitsSet()) + { + resultNode = otherNode; + } + break; + } + + case GT_AND_NOT: + { + // Handle `x & ~0 == x` and `0 & ~x == 0` + if (cnsNode->IsVectorZero()) + { + if (cnsNode == op1) + { + if (!otherNodeHasSideEffects) + { + // We need to preserve side effects when they exist + resultNode = cnsNode; + } + break; + } + else + { + resultNode = otherNode; + } + break; + } + + // Handle `x & ~AllBitsSet == 0` + if (cnsNode->IsVectorAllBitsSet() && (cnsNode == op2)) + { + if (!otherNodeHasSideEffects) + { + // We need to preserve side effects when they exist + resultNode = cnsNode; + } + } + break; + } + + case GT_DIV: + { + // Handle `x / 1 == x`. + // This is safe for all floats since we do not fault for sNaN + + if (cnsNode != op2) + { + break; + } + + if (!cnsNode->IsVectorBroadcast(retType, simdBaseType)) + { + break; + } + + if (cnsNode->AsVecCon()->IsScalarOne(retType, simdBaseType)) + { + resultNode = otherNode; + } + break; + } + + case GT_MUL: + { + if (!varTypeIsFloating(simdBaseType)) + { + // Handle `x * 0 == 0` and `0 * x == 0` + // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf + if (cnsNode->IsVectorZero()) + { + if (!otherNodeHasSideEffects) + { + // We need to preserve side effects when they exist + resultNode = cnsNode; + } + break; + } + } + + // Handle `x * 1 == x` and `1 * x == x` + // This is safe for all floats since we do not fault for sNaN + + if (!cnsNode->IsVectorBroadcast(retType, simdBaseType)) + { + break; + } + + if (cnsNode->AsVecCon()->IsScalarOne(retType, simdBaseType)) + { + resultNode = otherNode; + } + break; + } + + case GT_OR: + { + // Handle `x | 0 == x` and `0 | x == x` + if (cnsNode->IsVectorZero()) + { + resultNode = otherNode; + break; + } + + // Handle `x | AllBitsSet == AllBitsSet` and `AllBitsSet | x == AllBitsSet` + if (cnsNode->IsVectorAllBitsSet()) + { + if (!otherNodeHasSideEffects) + { + // We need to preserve side effects when they exist + resultNode = cnsNode; + } + } + break; + } + + case GT_ROL: + case GT_ROR: + case GT_LSH: + case GT_RSH: + case GT_RSZ: + { + // Handle `x rol 0 == x` and `0 rol x == 0` + // Handle `x ror 0 == x` and `0 ror x == 0` + // Handle `x << 0 == x` and `0 << x == 0` + // Handle `x >> 0 == x` and `0 >> x == 0` + // Handle `x >>> 0 == x` and `0 >>> x == 0` + + if (cnsNode->IsVectorZero()) + { + if (cnsNode == op2) + { + resultNode = otherNode; + } + else if (!otherNodeHasSideEffects) + { + // We need to preserve side effects when they exist + resultNode = cnsNode; + } + } + else if (cnsNode->IsIntegralConst(0)) + { + assert(cnsNode == op2); + resultNode = otherNode; + } + break; + } + + case GT_SUB: + { + if (varTypeIsFloating(simdBaseType)) + { + // Not safe for floating-point when x == -0.0 + break; + } + + // Handle `x - 0 == x` + if ((op2 == cnsNode) && cnsNode->IsVectorZero()) + { + resultNode = otherNode; + } + break; + } + + case GT_XOR: + { + // Handle `x | 0 == x` and `0 | x == x` + if (cnsNode->IsVectorZero()) + { + resultNode = otherNode; + } + break; + } + + default: + { + break; + } + } + + switch (ni) + { +#ifdef TARGET_ARM64 + case NI_AdvSimd_MultiplyByScalar: + case NI_AdvSimd_Arm64_MultiplyByScalar: + { + if (!varTypeIsFloating(simdBaseType)) + { + // Handle `x * 0 == 0` and `0 * x == 0` + // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf + if (cnsNode->IsVectorZero()) + { + if (!otherNodeHasSideEffects) + { + // We need to preserve side effects when they exist + resultNode = cnsNode; + } + break; + } + else if ((cnsNode == op2) && cnsNode->AsVecCon()->IsScalarZero(TYP_SIMD8, simdBaseType)) + { + if (!otherNodeHasSideEffects) + { + // We need to preserve side effects when they exist + resultNode = cnsNode; + } + break; + } + } + + // Handle x * 1 => x, but only if the scalar RHS is <1, ...>. + if ((cnsNode == op2) && cnsNode->AsVecCon()->IsScalarOne(TYP_SIMD8, simdBaseType)) + { + resultNode = otherNode; + } + break; + } +#endif + + default: + { + break; + } + } + } + + if (isNewNode && fgGlobalMorph) + { + fgMorphTreeDone(resultNode); + } + + if (resultNode != tree) + { + JITDUMP("\nFolding hwintrinsic:\n"); + DISPTREE(tree); + + if (resultNode->IsVectorConst()) + { + if (vnStore != nullptr) + { + fgValueNumberTreeConst(resultNode); + } + + // Make sure no side effect flags are set on this constant node. + resultNode->gtFlags &= ~GTF_ALL_EFFECT; + } + + JITDUMP("Transformed into:\n"); + DISPTREE(resultNode); + } + return resultNode; +} +#endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 942e75bdd49607..5fccb99dc140b6 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1765,6 +1765,7 @@ struct GenTree inline bool IsVectorZero() const; inline bool IsVectorCreate() const; inline bool IsVectorAllBitsSet() const; + inline bool IsVectorBroadcast(var_types simdType, var_types simdBaseType) const; inline bool IsMaskAllBitsSet() const; inline bool IsVectorConst(); @@ -6632,7 +6633,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2); - genTreeOps HWOperGet() const; + genTreeOps HWOperGet(bool* isScalar) const; private: void SetHWIntrinsicId(NamedIntrinsic intrinsicId); @@ -6876,6 +6877,179 @@ struct GenTreeVecCon : public GenTree #endif // FEATURE_HW_INTRINSICS + void EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types simdType, var_types baseType); + void EvaluateBinaryInPlace( + genTreeOps oper, bool scalar, var_types simdType, var_types baseType, GenTreeVecCon* other); + + template + void EvaluateBroadcastInPlace(var_types simdType, TBase scalar) + { + switch (simdType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + simd8_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + BroadcastConstantToSimd(&result, scalar); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + void EvaluateBroadcastInPlace(var_types simdType, var_types baseType, double scalar); + void EvaluateBroadcastInPlace(var_types simdType, var_types baseType, int64_t scalar); + + void SetElementFloating(var_types simdType, var_types simdBaseType, int32_t index, double value) + { + switch (simdType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd8Val, index, value); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd12Val, index, value); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd16Val, index, value); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd32Val, index, value); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateWithElementFloating(simdBaseType, &result, gtSimd64Val, index, value); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + void SetElementIntegral(var_types simdType, var_types simdBaseType, int32_t index, int64_t value) + { + switch (simdType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd8Val, index, value); + gtSimd8Val = result; + break; + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd12Val, index, value); + gtSimd12Val = result; + break; + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd16Val, index, value); + gtSimd16Val = result; + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd32Val, index, value); + gtSimd32Val = result; + break; + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateWithElementIntegral(simdBaseType, &result, gtSimd64Val, index, value); + gtSimd64Val = result; + break; + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + bool IsAllBitsSet() const { switch (gtType) @@ -6921,6 +7095,8 @@ struct GenTreeVecCon : public GenTree } } + bool IsBroadcast(var_types simdType, var_types simdBaseType) const; + static bool Equals(const GenTreeVecCon* left, const GenTreeVecCon* right) { var_types gtType = left->TypeGet(); @@ -7018,6 +7194,144 @@ struct GenTreeVecCon : public GenTree } } + double GetElementFloating(var_types simdType, var_types simdBaseType, int32_t index) const + { + switch (simdType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd8Val, index); + } + + case TYP_SIMD12: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd12Val, index); + } + + case TYP_SIMD16: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd16Val, index); + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd32Val, index); + } + + case TYP_SIMD64: + { + return EvaluateGetElementFloating(simdBaseType, gtSimd64Val, index); + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + int64_t GetElementIntegral(var_types simdType, var_types simdBaseType, int32_t index) const + { + switch (simdType) + { +#if defined(FEATURE_SIMD) + case TYP_SIMD8: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd8Val, index); + } + + case TYP_SIMD12: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd12Val, index); + } + + case TYP_SIMD16: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd16Val, index); + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd32Val, index); + } + + case TYP_SIMD64: + { + return EvaluateGetElementIntegral(simdBaseType, gtSimd64Val, index); + } +#endif // TARGET_XARCH +#endif // FEATURE_SIMD + + default: + { + unreached(); + } + } + } + + double ToScalarFloating(var_types simdType, var_types simdBaseType) const + { + return GetElementFloating(simdType, simdBaseType, 0); + } + + int64_t ToScalarIntegral(var_types simdType, var_types simdBaseType) const + { + return GetElementIntegral(simdType, simdBaseType, 0); + } + + bool IsElementZero(var_types simdType, var_types simdBaseType, int32_t index) const + { + switch (simdBaseType) + { + case TYP_FLOAT: + { + return GetElementIntegral(simdType, TYP_INT, index) == 0; + } + + case TYP_DOUBLE: + { + return GetElementIntegral(simdType, TYP_LONG, index) == 0; + } + + default: + { + return GetElementIntegral(simdType, simdBaseType, index) == 0; + } + } + } + + bool IsElementOne(var_types simdType, var_types simdBaseType, int32_t index) const + { + switch (simdBaseType) + { + case TYP_FLOAT: + case TYP_DOUBLE: + { + return GetElementFloating(simdType, simdBaseType, index) == 1; + } + + default: + { + return GetElementIntegral(simdType, simdBaseType, index) == 1; + } + } + } + + bool IsScalarZero(var_types simdType, var_types simdBaseType) const + { + return IsElementZero(simdType, simdBaseType, 0); + } + + bool IsScalarOne(var_types simdType, var_types simdBaseType) const + { + return IsElementOne(simdType, simdBaseType, 0); + } + GenTreeVecCon(var_types type) : GenTree(GT_CNS_VEC, type) { @@ -9286,6 +9600,24 @@ inline bool GenTree::IsVectorAllBitsSet() const return false; } +//------------------------------------------------------------------- +// IsVectorBroadcast: returns true if this node is a vector constant with the same value in all elements. +// +// Returns: +// True if this node is a vector constant with the same value in all elements. +// +inline bool GenTree::IsVectorBroadcast(var_types simdType, var_types simdBaseType) const +{ +#ifdef FEATURE_SIMD + if (IsCnsVec()) + { + return AsVecCon()->IsBroadcast(simdType, simdBaseType); + } +#endif // FEATURE_SIMD + + return false; +} + inline bool GenTree::IsMaskAllBitsSet() const { #ifdef TARGET_ARM64 diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 095f31246d0c68..67e0bb5e8e75eb 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -629,6 +629,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return vecCon; } + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector64_ToVector128Unsafe, simdBaseJitType, 8); + GenTree* idx = gtNewIconNode(2, TYP_INT); GenTree* zero = gtNewZeroConNode(TYP_FLOAT); op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16); @@ -655,6 +657,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return vecCon; } + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseJitType, 12); + GenTree* idx = gtNewIconNode(3, TYP_INT); GenTree* zero = gtNewZeroConNode(TYP_FLOAT); retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16); diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 23b09c0cef751e..4acc7f2d19ab42 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1494,6 +1494,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Vector64_ToVector128Unsafe: + case NI_Vector128_AsVector128Unsafe: case NI_Vector128_GetLower: GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ true); break; diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index 57ee811ef8be73..d96690c4003601 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -1766,6 +1766,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) break; } + case NI_Vector128_AsVector128Unsafe: case NI_Vector128_AsVector2: case NI_Vector128_AsVector3: case NI_Vector128_ToScalar: diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index d3c880f38e0fd2..e1edfadeadddcb 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -101,6 +101,7 @@ HARDWARE_INTRINSIC(Vector128, AsVector, HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index 4711d19c2234a0..2b835de6a1d265 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -35,6 +35,7 @@ HARDWARE_INTRINSIC(Vector128, AsVector, HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsd_simd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_NoContainment) HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index f96a0a1a199d42..b7c7313769112e 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1161,6 +1161,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return vecCon; } + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseJitType, 8); + GenTree* idx = gtNewIconNode(2, TYP_INT); GenTree* zero = gtNewZeroConNode(TYP_FLOAT); op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16); @@ -1187,6 +1189,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return vecCon; } + op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseJitType, 12); + GenTree* idx = gtNewIconNode(3, TYP_INT); GenTree* zero = gtNewZeroConNode(TYP_FLOAT); retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16); diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index f52fe739f11c00..c640105ec3a065 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -3070,12 +3070,17 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, GenTree* hwintrinsic = impHWIntrinsic(ni, clsHnd, method, sig R2RARG(entryPoint), mustExpand); - if (mustExpand && (hwintrinsic == nullptr)) + if (hwintrinsic == nullptr) { - return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand); + if (mustExpand) + { + return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand); + } + return nullptr; } - return hwintrinsic; + // Fold result, if possible + return gtFoldExpr(hwintrinsic); } else { @@ -3083,7 +3088,16 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, if (isIntrinsic) { - return impSimdAsHWIntrinsic(ni, clsHnd, method, sig, newobjThis, mustExpand); + GenTree* hwintrinsic = impSimdAsHWIntrinsic(ni, clsHnd, method, sig, newobjThis, mustExpand); + + if (hwintrinsic == nullptr) + { + assert(!mustExpand); + return nullptr; + } + + // Fold result, if possible + return gtFoldExpr(hwintrinsic); } } } diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 2675303454bebe..99f7b791c66d67 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -2202,7 +2202,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; } - if (second->AsHWIntrinsic()->HWOperGet() == GT_AND_NOT) + bool isScalar = false; + if ((second->AsHWIntrinsic()->HWOperGet(&isScalar) == GT_AND_NOT) || isScalar) { // currently ANDNOT logic cannot be optimized by the ternary node. break; @@ -9266,50 +9267,22 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, { simdType = Compiler::getSIMDTypeForSize(simdSize); } - int elementCount = GenTreeVecCon::ElementCount(genTypeSize(simdType), simdBaseType); - switch (simdBaseType) + if (varTypeIsSmall(simdBaseType)) { - case TYP_FLOAT: - case TYP_INT: - case TYP_UINT: - { - uint32_t firstElement = static_cast(childNode->gtSimdVal.u32[0]); - for (int i = 1; i < elementCount; i++) - { - uint32_t elementToCheck = static_cast(childNode->gtSimdVal.u32[i]); - if (firstElement != elementToCheck) - { - isCreatedFromScalar = false; - break; - } - } - break; - } - - case TYP_DOUBLE: -#if defined(TARGET_AMD64) - case TYP_LONG: - case TYP_ULONG: -#endif // TARGET_AMD64 - { - uint64_t firstElement = static_cast(childNode->gtSimdVal.u64[0]); - for (int i = 1; i < elementCount; i++) - { - uint64_t elementToCheck = static_cast(childNode->gtSimdVal.u64[i]); - if (firstElement != elementToCheck) - { - isCreatedFromScalar = false; - break; - } - } - break; - } - - default: - isCreatedFromScalar = false; - break; + isCreatedFromScalar = false; + } +#ifndef TARGET_64BIT + else if (varTypeIsLong(simdBaseType)) + { + isCreatedFromScalar = false; } +#endif // TARGET_64BIT + else + { + isCreatedFromScalar = childNode->IsBroadcast(simdType, simdBaseType); + } + if (isCreatedFromScalar) { NamedIntrinsic broadcastName = NI_AVX2_BroadcastScalarToVector128; diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 53c543291671ec..63b93fc74dbe4b 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1538,6 +1538,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } case NI_Vector64_ToVector128Unsafe: + case NI_Vector128_AsVector128Unsafe: case NI_Vector128_AsVector3: case NI_Vector128_GetLower: { diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index dff58fba4dd2a9..5e1238334cf1f2 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2272,6 +2272,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou break; } + case NI_Vector128_AsVector128Unsafe: case NI_Vector128_AsVector2: case NI_Vector128_AsVector3: case NI_Vector128_ToVector256: diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8f0d1009d0f634..bd1a954bd54b93 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -10894,11 +10894,19 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) } } - // Transforms: - // 1.(~v1 & v2) to VectorXxx.AndNot(v1, v2) - // 2.(v1 & (~v2)) to VectorXxx.AndNot(v2, v1) - switch (node->HWOperGet()) + bool isScalar = false; + genTreeOps oper = node->HWOperGet(&isScalar); + + if (isScalar) + { + return node; + } + + switch (oper) { + // Transforms: + // 1.(~v1 & v2) to VectorXxx.AndNot(v1, v2) + // 2.(v1 & (~v2)) to VectorXxx.AndNot(v2, v1) case GT_AND: { GenTree* op1 = node->Op(1); @@ -10910,7 +10918,12 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) { // Try handle: ~op1 & op2 GenTreeHWIntrinsic* hw = op1->AsHWIntrinsic(); - genTreeOps hwOper = hw->HWOperGet(); + genTreeOps hwOper = hw->HWOperGet(&isScalar); + + if (isScalar) + { + return node; + } if (hwOper == GT_NOT) { @@ -10939,7 +10952,12 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) { // Try handle: op1 & ~op2 GenTreeHWIntrinsic* hw = op2->AsHWIntrinsic(); - genTreeOps hwOper = hw->HWOperGet(); + genTreeOps hwOper = hw->HWOperGet(&isScalar); + + if (isScalar) + { + return node; + } if (hwOper == GT_NOT) { @@ -11962,8 +11980,6 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropD // GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) { - gtUpdateNodeOperSideEffects(multiOp); - bool dontCseConstArguments = false; #if defined(FEATURE_HW_INTRINSICS) // Opportunistically, avoid unexpected CSE for hw intrinsics with IMM arguments @@ -11986,12 +12002,10 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) for (GenTree** use : multiOp->UseEdges()) { - *use = fgMorphTree(*use); - + *use = fgMorphTree(*use); GenTree* operand = *use; - multiOp->gtFlags |= (operand->gtFlags & GTF_ALL_EFFECT); - if (dontCseConstArguments && operand->OperIsConst()) + if (dontCseConstArguments && operand->IsCnsIntOrI()) { operand->SetDoNotCSE(); } @@ -12010,10 +12024,33 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp) } } + gtUpdateNodeOperSideEffects(multiOp); + + for (GenTree** use : multiOp->UseEdges()) + { + GenTree* operand = *use; + multiOp->AddAllEffectsFlags(operand); + } + #if defined(FEATURE_HW_INTRINSICS) - if (opts.OptimizationEnabled() && multiOp->OperIs(GT_HWINTRINSIC)) + if (opts.OptimizationEnabled() && multiOp->OperIsHWIntrinsic()) { - GenTreeHWIntrinsic* hw = multiOp->AsHWIntrinsic(); + // Try to fold it, maybe we get lucky, + GenTree* foldedTree = gtFoldExpr(multiOp); + + if (foldedTree != multiOp) + { + assert(!fgIsCommaThrow(foldedTree)); + INDEBUG(foldedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return foldedTree; + } + else if (!foldedTree->OperIsHWIntrinsic()) + { + INDEBUG(foldedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED); + return foldedTree; + } + + GenTreeHWIntrinsic* hw = foldedTree->AsHWIntrinsic(); // Move constant vectors from op1 to op2 for commutative and compare operations if ((hw->GetOperandCount() == 2) && hw->Op(1)->IsVectorConst() && diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index a388eecebdf3c1..eb2afbfa08d265 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -532,7 +532,8 @@ TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1) } #else // Other platforms enforce masking in their encoding - assert((arg1 >= 0) && (arg1 < (sizeof(TBase) * 8))); + unsigned shiftCountMask = (sizeof(TBase) * 8) - 1; + arg1 &= shiftCountMask; #endif return arg0 >> arg1; @@ -608,7 +609,8 @@ TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1) } #else // Other platforms enforce masking in their encoding - assert((arg1 >= 0) && (arg1 < (sizeof(TBase) * 8))); + unsigned shiftCountMask = (sizeof(TBase) * 8) - 1; + arg1 &= shiftCountMask; #endif return arg0 << arg1; } @@ -647,7 +649,8 @@ TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1) } #else // Other platforms enforce masking in their encoding - assert((arg1 >= 0) && (arg1 < (sizeof(TBase) * 8))); + unsigned shiftCountMask = (sizeof(TBase) * 8) - 1; + arg1 &= shiftCountMask; #endif return arg0 >> arg1; } @@ -826,6 +829,168 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* } } +template +double EvaluateGetElementFloating(var_types simdBaseType, TSimd arg0, int32_t arg1) +{ + switch (simdBaseType) + { + case TYP_FLOAT: + { + return arg0.f32[arg1]; + } + + case TYP_DOUBLE: + { + return arg0.f64[arg1]; + } + + default: + { + unreached(); + } + } +} + +template +int64_t EvaluateGetElementIntegral(var_types simdBaseType, TSimd arg0, int32_t arg1) +{ + switch (simdBaseType) + { + case TYP_BYTE: + { + return arg0.i8[arg1]; + } + + case TYP_UBYTE: + { + return arg0.u8[arg1]; + } + + case TYP_SHORT: + { + return arg0.i16[arg1]; + } + + case TYP_USHORT: + { + return arg0.u16[arg1]; + } + + case TYP_INT: + { + return arg0.i32[arg1]; + } + + case TYP_UINT: + { + return arg0.u32[arg1]; + } + + case TYP_LONG: + { + return arg0.i64[arg1]; + } + + case TYP_ULONG: + { + return static_cast(arg0.u64[arg1]); + } + + default: + { + unreached(); + } + } +} + +template +void EvaluateWithElementFloating(var_types simdBaseType, TSimd* result, TSimd arg0, int32_t arg1, double arg2) +{ + *result = arg0; + + switch (simdBaseType) + { + case TYP_FLOAT: + { + result->f32[arg1] = static_cast(arg2); + break; + } + + case TYP_DOUBLE: + { + result->f64[arg1] = static_cast(arg2); + break; + } + + default: + { + unreached(); + } + } +} + +template +void EvaluateWithElementIntegral(var_types simdBaseType, TSimd* result, TSimd arg0, int32_t arg1, int64_t arg2) +{ + *result = arg0; + + switch (simdBaseType) + { + case TYP_BYTE: + { + result->i8[arg1] = static_cast(arg2); + break; + } + + case TYP_UBYTE: + { + result->u8[arg1] = static_cast(arg2); + break; + } + + case TYP_SHORT: + { + result->i16[arg1] = static_cast(arg2); + break; + } + + case TYP_USHORT: + { + result->u16[arg1] = static_cast(arg2); + break; + } + + case TYP_INT: + { + result->i32[arg1] = static_cast(arg2); + break; + } + + case TYP_UINT: + { + result->u32[arg1] = static_cast(arg2); + break; + } + + case TYP_LONG: + { + result->i64[arg1] = static_cast(arg2); + break; + } + + case TYP_ULONG: + { + result->u64[arg1] = static_cast(arg2); + break; + } + + default: + { + unreached(); + } + } +} + template void BroadcastConstantToSimd(TSimd* result, TBase arg0) { diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 3fd8625b138a95..c28f1f4b43291d 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -1679,7 +1679,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, { assert(retType == TYP_VOID); assert(simdBaseType == TYP_FLOAT); - assert((simdSize == 12) || (simdSize == 16)); + assert(simdSize == 12); + assert(simdType == TYP_SIMD12); // TODO-CQ: We should be able to check for contiguous args here after // the relevant methods are updated to support more than just float @@ -1689,21 +1690,19 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, GenTreeVecCon* vecCon = op2->AsVecCon(); vecCon->gtType = simdType; - if (simdSize == 12) - { - vecCon->gtSimdVal.f32[2] = static_cast(op3->AsDblCon()->DconValue()); - } - else - { - vecCon->gtSimdVal.f32[3] = static_cast(op3->AsDblCon()->DconValue()); - } - - copyBlkSrc = vecCon; + vecCon->gtSimdVal.f32[2] = static_cast(op3->AsDblCon()->DconValue()); + copyBlkSrc = vecCon; } else { - GenTree* idx = gtNewIconNode((simdSize == 12) ? 2 : 3, TYP_INT); - copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, idx, op3, simdBaseJitType, simdSize); + GenTree* idx = gtNewIconNode(2, TYP_INT); + + op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_Vector128_AsVector128Unsafe, simdBaseJitType, + 12); + op2 = gtNewSimdWithElementNode(TYP_SIMD16, op2, idx, op3, simdBaseJitType, 16); + + copyBlkSrc = + gtNewSimdHWIntrinsicNode(TYP_SIMD12, op2, NI_Vector128_AsVector3, simdBaseJitType, 16); } copyBlkDst = op1; diff --git a/src/coreclr/jit/valuenum.cpp b/src/coreclr/jit/valuenum.cpp index 0856a61dc42c52..024ca9bf6b5d9b 100644 --- a/src/coreclr/jit/valuenum.cpp +++ b/src/coreclr/jit/valuenum.cpp @@ -7126,7 +7126,7 @@ ValueNum EvaluateBinarySimd(ValueNumStore* vns, } template -ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, TSimd arg0, int arg1) +ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, TSimd arg0, int32_t arg1) { switch (baseType) { @@ -7197,9 +7197,14 @@ ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types baseType, TSimd ar } } -ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types type, var_types baseType, ValueNum arg0VN, int arg1) +ValueNum EvaluateSimdGetElement( + ValueNumStore* vns, var_types simdType, var_types baseType, ValueNum arg0VN, int32_t arg1) { - switch (vns->TypeOfVN(arg0VN)) + assert(vns->IsVNConstant(arg0VN)); + assert(simdType == vns->TypeOfVN(arg0VN)); + assert(static_cast(arg1) < GenTreeVecCon::ElementCount(genTypeSize(simdType), baseType)); + + switch (simdType) { case TYP_SIMD8: { @@ -7235,16 +7240,23 @@ ValueNum EvaluateSimdGetElement(ValueNumStore* vns, var_types type, var_types ba } } -ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - bool encodeResultType, - ValueNum resultTypeVN) +ValueNum ValueNumStore::EvalHWIntrinsicFunUnary( + GenTreeHWIntrinsic* tree, VNFunc func, ValueNum arg0VN, bool encodeResultType, ValueNum resultTypeVN) { + var_types type = tree->TypeGet(); + var_types baseType = tree->GetSimdBaseType(); + NamedIntrinsic ni = tree->GetHWIntrinsicId(); + if (IsVNConstant(arg0VN)) { + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); + + if (oper != GT_NONE) + { + return EvaluateUnarySimd(this, oper, isScalar, type, baseType, arg0VN); + } + switch (ni) { #ifdef TARGET_ARM64 @@ -7304,21 +7316,24 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, return VNForLongCon(static_cast(result)); } - case NI_AdvSimd_Negate: - case NI_AdvSimd_Arm64_Negate: + case NI_Vector64_ToVector128: + case NI_Vector64_ToVector128Unsafe: { - return EvaluateUnarySimd(this, GT_NEG, /* scalar */ false, type, baseType, arg0VN); + simd16_t result = {}; + result.v64[0] = GetConstantSimd8(arg0VN); + return VNForSimd16Con(result); } - case NI_AdvSimd_NegateScalar: - case NI_AdvSimd_Arm64_NegateScalar: + case NI_Vector128_GetLower: { - return EvaluateUnarySimd(this, GT_NEG, /* scalar */ true, type, baseType, arg0VN); + simd8_t result = GetConstantSimd16(arg0VN).v64[0]; + return VNForSimd8Con(result); } - case NI_AdvSimd_Not: + case NI_Vector128_GetUpper: { - return EvaluateUnarySimd(this, GT_NOT, /* scalar */ false, type, baseType, arg0VN); + simd8_t result = GetConstantSimd16(arg0VN).v64[1]; + return VNForSimd8Con(result); } #endif // TARGET_ARM64 @@ -7430,8 +7445,102 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, uint32_t result = BitOperations::BitScanReverse(static_cast(value)); return VNForLongCon(static_cast(result)); } + + case NI_Vector128_AsVector2: + { + simd8_t result = GetConstantSimd16(arg0VN).v64[0]; + return VNForSimd8Con(result); + } + + case NI_Vector128_ToVector256: + case NI_Vector128_ToVector256Unsafe: + { + simd32_t result = {}; + result.v128[0] = GetConstantSimd16(arg0VN); + return VNForSimd32Con(result); + } + + case NI_Vector128_ToVector512: + { + simd64_t result = {}; + result.v128[0] = GetConstantSimd16(arg0VN); + return VNForSimd64Con(result); + } + + case NI_Vector256_GetLower: + { + simd16_t result = GetConstantSimd32(arg0VN).v128[0]; + return VNForSimd16Con(result); + } + + case NI_Vector256_GetUpper: + { + simd16_t result = GetConstantSimd32(arg0VN).v128[1]; + return VNForSimd16Con(result); + } + + case NI_Vector256_ToVector512: + case NI_Vector256_ToVector512Unsafe: + { + simd64_t result = {}; + result.v256[0] = GetConstantSimd32(arg0VN); + return VNForSimd64Con(result); + } + + case NI_Vector512_GetLower: + { + simd32_t result = GetConstantSimd64(arg0VN).v256[0]; + return VNForSimd32Con(result); + } + + case NI_Vector512_GetUpper: + { + simd32_t result = GetConstantSimd64(arg0VN).v256[1]; + return VNForSimd32Con(result); + } + + case NI_Vector512_GetLower128: + { + simd16_t result = GetConstantSimd64(arg0VN).v128[0]; + return VNForSimd16Con(result); + } #endif // TARGET_XARCH + case NI_Vector128_AsVector3: + { + simd12_t result = {}; + simd16_t vector = GetConstantSimd16(arg0VN); + + result.f32[0] = vector.f32[0]; + result.f32[1] = vector.f32[1]; + result.f32[2] = vector.f32[2]; + + return VNForSimd12Con(result); + } + + case NI_Vector128_AsVector128Unsafe: + { + if (TypeOfVN(arg0VN) == TYP_SIMD8) + { + simd16_t result = {}; + result.v64[0] = GetConstantSimd8(arg0VN); + return VNForSimd16Con(result); + } + else + { + assert(TypeOfVN(arg0VN) == TYP_SIMD12); + + simd16_t result = {}; + simd12_t vector = GetConstantSimd12(arg0VN); + + result.f32[0] = vector.f32[0]; + result.f32[1] = vector.f32[1]; + result.f32[2] = vector.f32[2]; + + return VNForSimd16Con(result); + } + } + case NI_Vector128_ToScalar: #ifdef TARGET_ARM64 case NI_Vector64_ToScalar: @@ -7440,7 +7549,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, case NI_Vector512_ToScalar: #endif { - return EvaluateSimdGetElement(this, type, baseType, arg0VN, 0); + return EvaluateSimdGetElement(this, TypeOfVN(arg0VN), baseType, arg0VN, 0); } default: @@ -7455,15 +7564,17 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunUnary(var_types type, return VNForFunc(type, func, arg0VN); } -ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - ValueNum arg1VN, - bool encodeResultType, - ValueNum resultTypeVN) +ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(GenTreeHWIntrinsic* tree, + VNFunc func, + ValueNum arg0VN, + ValueNum arg1VN, + bool encodeResultType, + ValueNum resultTypeVN) { + var_types type = tree->TypeGet(); + var_types baseType = tree->GetSimdBaseType(); + NamedIntrinsic ni = tree->GetHWIntrinsicId(); + ValueNum cnsVN = NoVN; ValueNum argVN = NoVN; @@ -7490,89 +7601,53 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, { assert(IsVNConstant(arg0VN) && IsVNConstant(arg1VN)); - switch (ni) - { -#ifdef TARGET_ARM64 - case NI_AdvSimd_Add: - case NI_AdvSimd_Arm64_Add: -#else - case NI_SSE_Add: - case NI_SSE2_Add: - case NI_AVX_Add: - case NI_AVX2_Add: - case NI_AVX512F_Add: - case NI_AVX512BW_Add: -#endif - { - return EvaluateBinarySimd(this, GT_ADD, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); -#ifdef TARGET_ARM64 - case NI_AdvSimd_AddScalar: -#else - case NI_SSE_AddScalar: - case NI_SSE2_AddScalar: -#endif + if (oper != GT_NONE) + { +#if defined(TARGET_XARCH) + if (oper == GT_AND_NOT) { - return EvaluateBinarySimd(this, GT_ADD, /* scalar */ true, type, baseType, arg0VN, arg1VN); + // xarch does: ~arg0VN & arg1VN + std::swap(arg0VN, arg1VN); } - -#ifdef TARGET_ARM64 - case NI_AdvSimd_And: -#else - case NI_SSE_And: - case NI_SSE2_And: - case NI_AVX_And: - case NI_AVX2_And: - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX10v1_V512_And: -#endif + else if ((oper == GT_LSH) || (oper == GT_RSH) || (oper == GT_RSZ)) { - return EvaluateBinarySimd(this, GT_AND, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } + if (TypeOfVN(arg1VN) == TYP_SIMD16) + { + if ((ni != NI_AVX2_ShiftLeftLogicalVariable) && (ni != NI_AVX2_ShiftRightArithmeticVariable) && + (ni != NI_AVX512F_VL_ShiftRightArithmeticVariable) && (ni != NI_AVX2_ShiftRightLogicalVariable)) + { + // The xarch shift instructions support taking the shift amount as + // a simd16, in which case they take the shift amount from the lower + // 64-bits. -#ifdef TARGET_ARM64 - case NI_AdvSimd_BitwiseClear: - { - return EvaluateBinarySimd(this, GT_AND_NOT, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } -#else - case NI_SSE_AndNot: - case NI_SSE2_AndNot: - case NI_AVX_AndNot: - case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: - { - // xarch does: ~arg0VN & arg1VN - return EvaluateBinarySimd(this, GT_AND_NOT, /* scalar */ false, type, baseType, arg1VN, arg0VN); - } -#endif + uint64_t shiftAmount = GetConstantSimd16(arg1VN).u64[0]; -#ifdef TARGET_ARM64 - case NI_AdvSimd_Arm64_Divide: -#else - case NI_SSE_Divide: - case NI_SSE2_Divide: - case NI_AVX_Divide: - case NI_AVX512F_Divide: -#endif - { - return EvaluateBinarySimd(this, GT_DIV, /* scalar */ false, type, baseType, arg0VN, arg1VN); + if (genTypeSize(baseType) != 8) + { + if (shiftAmount > INT_MAX) + { + // Ensure we don't lose track the the amount is an overshift + shiftAmount = -1; + } + arg1VN = VNForIntCon(static_cast(shiftAmount)); + } + else + { + arg1VN = VNForLongCon(static_cast(shiftAmount)); + } + } + } } +#endif // TARGET_XARCH -#ifdef TARGET_ARM64 - case NI_AdvSimd_DivideScalar: -#else - case NI_SSE_DivideScalar: - case NI_SSE2_DivideScalar: -#endif - { - return EvaluateBinarySimd(this, GT_DIV, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } + return EvaluateBinarySimd(this, oper, isScalar, type, baseType, arg0VN, arg1VN); + } + switch (ni) + { case NI_Vector128_GetElement: #ifdef TARGET_ARM64 case NI_Vector64_GetElement: @@ -7581,10 +7656,17 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, case NI_Vector512_GetElement: #endif { - return EvaluateSimdGetElement(this, type, baseType, arg0VN, GetConstantInt32(arg1VN)); + int32_t index = GetConstantInt32(arg1VN); + + if (static_cast(index) >= GenTreeVecCon::ElementCount(genTypeSize(type), baseType)) + { + // Nothing to fold for out of range indexes + break; + } + return EvaluateSimdGetElement(this, TypeOfVN(arg0VN), baseType, arg0VN, index); } -#ifdef TARGET_ARM64 +#if defined(TARGET_ARM64) case NI_AdvSimd_MultiplyByScalar: case NI_AdvSimd_Arm64_MultiplyByScalar: { @@ -7592,274 +7674,73 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, // We need to extract it and then functionally broadcast it up for the evaluation to // work as expected. - arg1VN = EvaluateSimdGetElement(this, type, baseType, arg1VN, 0); - FALLTHROUGH; - } -#endif - -#ifdef TARGET_XARCH - case NI_AVX512F_Multiply: - { - if (!varTypeIsFloating(baseType)) - { - // We don't support this for integrals since it returns a different size than the input - break; - } - FALLTHROUGH; - } -#endif // TARGET_XARCH - -#ifdef TARGET_ARM64 - case NI_AdvSimd_Multiply: - case NI_AdvSimd_Arm64_Multiply: -#else - case NI_SSE_Multiply: - case NI_SSE2_Multiply: - case NI_SSE2_MultiplyLow: - case NI_SSE41_MultiplyLow: - case NI_AVX_Multiply: - case NI_AVX2_MultiplyLow: - case NI_AVX512F_MultiplyLow: - case NI_AVX512BW_MultiplyLow: - case NI_AVX512DQ_MultiplyLow: - case NI_AVX512DQ_VL_MultiplyLow: - case NI_AVX10v1_MultiplyLow: - case NI_AVX10v1_V512_MultiplyLow: -#endif - { + arg1VN = EvaluateSimdGetElement(this, TYP_SIMD8, baseType, arg1VN, 0); return EvaluateBinarySimd(this, GT_MUL, /* scalar */ false, type, baseType, arg0VN, arg1VN); } -#ifdef TARGET_ARM64 - case NI_AdvSimd_MultiplyScalar: -#else - case NI_SSE_MultiplyScalar: - case NI_SSE2_MultiplyScalar: -#endif + case NI_Vector128_WithLower: { - return EvaluateBinarySimd(this, GT_MUL, /* scalar */ true, type, baseType, arg0VN, arg1VN); + simd16_t result = GetConstantSimd16(arg0VN); + result.v64[0] = GetConstantSimd8(arg1VN); + return VNForSimd16Con(result); } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Or: -#else - case NI_SSE_Or: - case NI_SSE2_Or: - case NI_AVX_Or: - case NI_AVX2_Or: - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX10v1_V512_Or: -#endif + case NI_Vector128_WithUpper: { - return EvaluateBinarySimd(this, GT_OR, /* scalar */ false, type, baseType, arg0VN, arg1VN); + simd16_t result = GetConstantSimd16(arg0VN); + result.v64[1] = GetConstantSimd8(arg1VN); + return VNForSimd16Con(result); } +#endif // TARGET_ARM64 -#ifdef TARGET_XARCH - case NI_AVX512F_RotateLeft: - case NI_AVX512F_VL_RotateLeft: - case NI_AVX10v1_RotateLeft: +#if defined(TARGET_XARCH) + case NI_Vector256_WithLower: { - return EvaluateBinarySimd(this, GT_ROL, /* scalar */ false, type, baseType, arg0VN, arg1VN); + simd32_t result = GetConstantSimd32(arg0VN); + result.v128[0] = GetConstantSimd16(arg1VN); + return VNForSimd32Con(result); } - case NI_AVX512F_RotateRight: - case NI_AVX512F_VL_RotateRight: - case NI_AVX10v1_RotateRight: + case NI_Vector256_WithUpper: { - return EvaluateBinarySimd(this, GT_ROR, /* scalar */ false, type, baseType, arg0VN, arg1VN); + simd32_t result = GetConstantSimd32(arg0VN); + result.v128[1] = GetConstantSimd16(arg1VN); + return VNForSimd32Con(result); } -#endif // TARGET_XARCH -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftLeftLogical: -#else - case NI_SSE2_ShiftLeftLogical: - case NI_AVX2_ShiftLeftLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512BW_ShiftLeftLogical: -#endif + case NI_Vector512_WithLower: { -#ifdef TARGET_XARCH - if (TypeOfVN(arg1VN) == TYP_SIMD16) - { - // The xarch shift instructions support taking the shift amount as - // a simd16, in which case they take the shift amount from the lower - // 64-bits. - - uint64_t shiftAmount = GetConstantSimd16(arg1VN).u64[0]; - - if (genTypeSize(baseType) != 8) - { - if (shiftAmount > INT_MAX) - { - // Ensure we don't lose track the the amount is an overshift - shiftAmount = -1; - } - arg1VN = VNForIntCon(static_cast(shiftAmount)); - } - else - { - arg1VN = VNForLongCon(static_cast(shiftAmount)); - } - } -#endif // TARGET_XARCH - - return EvaluateBinarySimd(this, GT_LSH, /* scalar */ false, type, baseType, arg0VN, arg1VN); + simd64_t result = GetConstantSimd64(arg0VN); + result.v256[0] = GetConstantSimd32(arg1VN); + return VNForSimd64Con(result); } -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftRightArithmetic: -#else - case NI_SSE2_ShiftRightArithmetic: - case NI_AVX2_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX10v1_ShiftRightArithmetic: -#endif + case NI_Vector512_WithUpper: { -#ifdef TARGET_XARCH - if (TypeOfVN(arg1VN) == TYP_SIMD16) - { - // The xarch shift instructions support taking the shift amount as - // a simd16, in which case they take the shift amount from the lower - // 64-bits. - - uint64_t shiftAmount = GetConstantSimd16(arg1VN).u64[0]; - - if (genTypeSize(baseType) != 8) - { - if (shiftAmount > INT_MAX) - { - // Ensure we don't lose track the the amount is an overshift - shiftAmount = -1; - } - arg1VN = VNForIntCon(static_cast(shiftAmount)); - } - else - { - arg1VN = VNForLongCon(static_cast(shiftAmount)); - } - } -#endif // TARGET_XARCH - - return EvaluateBinarySimd(this, GT_RSH, /* scalar */ false, type, baseType, arg0VN, arg1VN); + simd64_t result = GetConstantSimd64(arg0VN); + result.v256[1] = GetConstantSimd32(arg1VN); + return VNForSimd64Con(result); } - -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftRightLogical: -#else - case NI_SSE2_ShiftRightLogical: - case NI_AVX2_ShiftRightLogical: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512BW_ShiftRightLogical: -#endif - { -#ifdef TARGET_XARCH - if (TypeOfVN(arg1VN) == TYP_SIMD16) - { - // The xarch shift instructions support taking the shift amount as - // a simd16, in which case they take the shift amount from the lower - // 64-bits. - - uint64_t shiftAmount = GetConstantSimd16(arg1VN).u64[0]; - - if (genTypeSize(baseType) != 8) - { - if (shiftAmount > INT_MAX) - { - // Ensure we don't lose track the the amount is an overshift - shiftAmount = -1; - } - arg1VN = VNForIntCon(static_cast(shiftAmount)); - } - else - { - arg1VN = VNForLongCon(static_cast(shiftAmount)); - } - } #endif // TARGET_XARCH - return EvaluateBinarySimd(this, GT_RSZ, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } - -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftLeftLogicalScalar: - { - return EvaluateBinarySimd(this, GT_LSH, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } - - case NI_AdvSimd_ShiftRightArithmeticScalar: - { - return EvaluateBinarySimd(this, GT_RSH, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } - - case NI_AdvSimd_ShiftRightLogicalScalar: - { - return EvaluateBinarySimd(this, GT_RSZ, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } -#endif // TARGET_ARM64 - -#ifdef TARGET_ARM64 - case NI_AdvSimd_Subtract: - case NI_AdvSimd_Arm64_Subtract: -#else - case NI_SSE_Subtract: - case NI_SSE2_Subtract: - case NI_AVX_Subtract: - case NI_AVX2_Subtract: - case NI_AVX512F_Subtract: - case NI_AVX512BW_Subtract: -#endif - { - return EvaluateBinarySimd(this, GT_SUB, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } - -#ifdef TARGET_ARM64 - case NI_AdvSimd_SubtractScalar: -#else - case NI_SSE_SubtractScalar: - case NI_SSE2_SubtractScalar: -#endif - { - return EvaluateBinarySimd(this, GT_SUB, /* scalar */ true, type, baseType, arg0VN, arg1VN); - } - -#ifdef TARGET_ARM64 - case NI_AdvSimd_Xor: -#else - case NI_SSE_Xor: - case NI_SSE2_Xor: - case NI_AVX_Xor: - case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: -#endif - { - return EvaluateBinarySimd(this, GT_XOR, /* scalar */ false, type, baseType, arg0VN, arg1VN); - } - default: break; } } else if (cnsVN != NoVN) { - switch (ni) + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); + + if (isScalar) { -#ifdef TARGET_ARM64 - case NI_AdvSimd_Add: - case NI_AdvSimd_Arm64_Add: -#else - case NI_SSE_Add: - case NI_SSE2_Add: - case NI_AVX_Add: - case NI_AVX2_Add: - case NI_AVX512F_Add: - case NI_AVX512BW_Add: -#endif + // We don't support folding scalars today + oper = GT_NONE; + } + + switch (oper) + { + case GT_ADD: { if (varTypeIsFloating(baseType)) { @@ -7877,17 +7758,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_And: -#else - case NI_SSE_And: - case NI_SSE2_And: - case NI_AVX_And: - case NI_AVX2_And: - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX10v1_V512_And: -#endif + case GT_AND: { // Handle `x & 0 == 0` and `0 & x == 0` ValueNum zeroVN = VNZeroForType(type); @@ -7897,7 +7768,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, return zeroVN; } - // Handle `x & ~0 == x` and `~0 & x == x` + // Handle `x & AllBitsSet == x` and `AllBitsSet & x == x` ValueNum allBitsVN = VNAllBitsForType(type); if (cnsVN == allBitsVN) @@ -7907,58 +7778,38 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_BitwiseClear: -#else - case NI_SSE_AndNot: - case NI_SSE2_AndNot: - case NI_AVX_AndNot: - case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: + case GT_AND_NOT: { -#ifdef TARGET_ARM64 - if (cnsVN == arg0VN) - { - // arm64 preserves the args, so we can only handle `x & ~cns` - break; - } -#else - if (cnsVN == arg1VN) - { - // xarch swaps the args, so we can only handle `~cns & x` - break; - } -#endif +#if defined(TARGET_XARCH) + std::swap(arg0VN, arg1VN); +#endif // TARGET_XARCH - // Handle `x & ~0 == x` + // Handle `x & ~0 == x` and `0 & ~x == 0` ValueNum zeroVN = VNZeroForType(type); if (cnsVN == zeroVN) { + if (cnsVN == arg0VN) + { + return zeroVN; + } return argVN; } - // Handle `x & 0 == 0` + // Handle `x & ~AllBitsSet == 0` ValueNum allBitsVN = VNAllBitsForType(type); if (cnsVN == allBitsVN) { - return zeroVN; + if (cnsVN == arg1VN) + { + return zeroVN; + } } break; } -#endif -#ifdef TARGET_ARM64 - case NI_AdvSimd_Arm64_Divide: -#else - case NI_SSE_Divide: - case NI_SSE2_Divide: - case NI_AVX_Divide: - case NI_AVX512F_Divide: -#endif + case GT_DIV: { // Handle `x / 1 == x`. // This is safe for all floats since we do not fault for sNaN @@ -7980,65 +7831,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_MultiplyByScalar: - case NI_AdvSimd_Arm64_MultiplyByScalar: - { - if (!varTypeIsFloating(baseType)) - { - // Handle `x * 0 == 0` and `0 * x == 0` - // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf - ValueNum zeroVN = VNZeroForType(TypeOfVN(cnsVN)); - - if (cnsVN == zeroVN) - { - return VNZeroForType(type); - } - } - - assert((TypeOfVN(arg0VN) == type) && (TypeOfVN(arg1VN) == TYP_SIMD8)); - - // Handle x * 1 => x, but only if the scalar RHS is <1, ...>. - if (IsVNConstant(arg1VN)) - { - if (EvaluateSimdGetElement(this, TYP_SIMD8, baseType, arg1VN, 0) == VNOneForType(baseType)) - { - return arg0VN; - } - } - break; - } -#endif - -#ifdef TARGET_XARCH - case NI_AVX512F_Multiply: - { - if (!varTypeIsFloating(baseType)) - { - // We don't support this for integrals since it returns a different size than the input - break; - } - FALLTHROUGH; - } -#endif // TARGET_XARCH - -#ifdef TARGET_ARM64 - case NI_AdvSimd_Multiply: - case NI_AdvSimd_Arm64_Multiply: -#else - case NI_SSE_Multiply: - case NI_SSE2_Multiply: - case NI_SSE2_MultiplyLow: - case NI_SSE41_MultiplyLow: - case NI_AVX_Multiply: - case NI_AVX2_MultiplyLow: - case NI_AVX512F_MultiplyLow: - case NI_AVX512BW_MultiplyLow: - case NI_AVX512DQ_MultiplyLow: - case NI_AVX512DQ_VL_MultiplyLow: - case NI_AVX10v1_MultiplyLow: - case NI_AVX10v1_V512_MultiplyLow: -#endif + case GT_MUL: { if (!varTypeIsFloating(baseType)) { @@ -8072,17 +7865,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Or: -#else - case NI_SSE_Or: - case NI_SSE2_Or: - case NI_AVX_Or: - case NI_AVX2_Or: - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX10v1_V512_Or: -#endif + case GT_OR: { // Handle `x | 0 == x` and `0 | x == x` ValueNum zeroVN = VNZeroForType(type); @@ -8102,27 +7885,14 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_ShiftLeftLogical: - case NI_AdvSimd_ShiftRightArithmetic: - case NI_AdvSimd_ShiftRightLogical: -#else - case NI_SSE2_ShiftLeftLogical: - case NI_SSE2_ShiftRightArithmetic: - case NI_SSE2_ShiftRightLogical: - case NI_AVX2_ShiftLeftLogical: - case NI_AVX2_ShiftRightArithmetic: - case NI_AVX2_ShiftRightLogical: - case NI_AVX512F_ShiftLeftLogical: - case NI_AVX512F_ShiftRightArithmetic: - case NI_AVX512F_ShiftRightLogical: - case NI_AVX512F_VL_ShiftRightArithmetic: - case NI_AVX512BW_ShiftLeftLogical: - case NI_AVX512BW_ShiftRightArithmetic: - case NI_AVX512BW_ShiftRightLogical: - case NI_AVX10v1_ShiftRightArithmetic: -#endif + case GT_ROL: + case GT_ROR: + case GT_LSH: + case GT_RSH: + case GT_RSZ: { + // Handle `x rol 0 == x` and `0 rol x == 0` + // Handle `x ror 0 == x` and `0 ror x == 0` // Handle `x << 0 == x` and `0 << x == 0` // Handle `x >> 0 == x` and `0 >> x == 0` // Handle `x >>> 0 == x` and `0 >>> x == 0` @@ -8135,17 +7905,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Subtract: - case NI_AdvSimd_Arm64_Subtract: -#else - case NI_SSE_Subtract: - case NI_SSE2_Subtract: - case NI_AVX_Subtract: - case NI_AVX2_Subtract: - case NI_AVX512F_Subtract: - case NI_AVX512BW_Subtract: -#endif + case GT_SUB: { if (varTypeIsFloating(baseType)) { @@ -8163,19 +7923,9 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, break; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Xor: -#else - case NI_SSE_Xor: - case NI_SSE2_Xor: - case NI_AVX_Xor: - case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: -#endif + case GT_XOR: { - // Handle `x | 0 == x` and `0 | x == x` + // Handle `x ^ 0 == x` and `0 ^ x == x` ValueNum zeroVN = VNZeroForType(type); if (cnsVN == zeroVN) @@ -8186,72 +7936,92 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, } default: + { break; + } } - } - else if (arg0VN == arg1VN) - { + switch (ni) { #ifdef TARGET_ARM64 - case NI_AdvSimd_And: -#else - case NI_SSE_And: - case NI_SSE2_And: - case NI_AVX_And: - case NI_AVX2_And: - case NI_AVX512F_And: - case NI_AVX512DQ_And: - case NI_AVX10v1_V512_And: + case NI_AdvSimd_MultiplyByScalar: + case NI_AdvSimd_Arm64_MultiplyByScalar: + { + if (!varTypeIsFloating(baseType)) + { + // Handle `x * 0 == 0` and `0 * x == 0` + // Not safe for floating-point when x == -0.0, NaN, +Inf, -Inf + ValueNum zeroVN = VNZeroForType(TypeOfVN(cnsVN)); + + if (cnsVN == zeroVN) + { + return VNZeroForType(type); + } + else if (cnsVN == arg1VN) + { + ValueNum scalarVN = EvaluateSimdGetElement(this, TYP_SIMD8, baseType, arg1VN, 0); + + if (scalarVN == VNZeroForType(baseType)) + { + return VNZeroForType(type); + } + } + } + + assert((TypeOfVN(arg0VN) == type) && (TypeOfVN(arg1VN) == TYP_SIMD8)); + + // Handle x * 1 => x, but only if the scalar RHS is <1, ...>. + if (IsVNConstant(arg1VN)) + { + ValueNum scalarVN = EvaluateSimdGetElement(this, TYP_SIMD8, baseType, arg1VN, 0); + + if (scalarVN == VNOneForType(baseType)) + { + return arg0VN; + } + } + break; + } #endif + + default: + { + break; + } + } + } + else if (arg0VN == arg1VN) + { + bool isScalar = false; + genTreeOps oper = tree->HWOperGet(&isScalar); + + if (isScalar) + { + // We don't support folding scalars today + oper = GT_NONE; + } + + switch (oper) + { + case GT_AND: { // Handle `x & x == x` return arg0VN; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_BitwiseClear: -#else - case NI_SSE_AndNot: - case NI_SSE2_AndNot: - case NI_AVX_AndNot: - case NI_AVX2_AndNot: - case NI_AVX512F_AndNot: - case NI_AVX512DQ_AndNot: - case NI_AVX10v1_V512_AndNot: + case GT_AND_NOT: { // Handle `x & ~x == 0` return VNZeroForType(type); } -#endif -#ifdef TARGET_ARM64 - case NI_AdvSimd_Or: -#else - case NI_SSE_Or: - case NI_SSE2_Or: - case NI_AVX_Or: - case NI_AVX2_Or: - case NI_AVX512F_Or: - case NI_AVX512DQ_Or: - case NI_AVX10v1_V512_Or: -#endif + case GT_OR: { // Handle `x | x == x` return arg0VN; } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Subtract: - case NI_AdvSimd_Arm64_Subtract: -#else - case NI_SSE_Subtract: - case NI_SSE2_Subtract: - case NI_AVX_Subtract: - case NI_AVX2_Subtract: - case NI_AVX512F_Subtract: - case NI_AVX512BW_Subtract: -#endif + case GT_SUB: { if (varTypeIsFloating(baseType)) { @@ -8263,17 +8033,7 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, return VNZeroForType(type); } -#ifdef TARGET_ARM64 - case NI_AdvSimd_Xor: -#else - case NI_SSE_Xor: - case NI_SSE2_Xor: - case NI_AVX_Xor: - case NI_AVX2_Xor: - case NI_AVX512F_Xor: - case NI_AVX512DQ_Xor: - case NI_AVX10v1_V512_Xor: -#endif + case GT_XOR: { // Handle `x ^ x == 0` return VNZeroForType(type); @@ -8291,45 +8051,53 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunBinary(var_types type, return VNForFunc(type, func, arg0VN, arg1VN); } -ValueNum EvaluateSimdFloatWithElement(ValueNumStore* vns, var_types type, ValueNum arg0VN, int index, float value) +ValueNum EvaluateSimdWithElementFloating( + ValueNumStore* vns, var_types simdType, var_types baseType, ValueNum arg0VN, int32_t arg1, double arg2) { + assert(varTypeIsFloating(baseType)); assert(vns->IsVNConstant(arg0VN)); - assert(static_cast(index) < genTypeSize(type) / genTypeSize(TYP_FLOAT)); + assert(simdType == vns->TypeOfVN(arg0VN)); + assert(static_cast(arg1) < GenTreeVecCon::ElementCount(genTypeSize(simdType), baseType)); - switch (type) + switch (simdType) { case TYP_SIMD8: { - simd8_t cnsVec = vns->GetConstantSimd8(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd8Con(cnsVec); + simd8_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd8(arg0VN), arg1, arg2); + return vns->VNForSimd8Con(result); } + case TYP_SIMD12: { - simd12_t cnsVec = vns->GetConstantSimd12(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd12Con(cnsVec); + simd12_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd12(arg0VN), arg1, arg2); + return vns->VNForSimd12Con(result); } + case TYP_SIMD16: { - simd16_t cnsVec = vns->GetConstantSimd16(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd16Con(cnsVec); + simd16_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd16(arg0VN), arg1, arg2); + return vns->VNForSimd16Con(result); } + #if defined TARGET_XARCH case TYP_SIMD32: { - simd32_t cnsVec = vns->GetConstantSimd32(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd32Con(cnsVec); + simd32_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd32(arg0VN), arg1, arg2); + return vns->VNForSimd32Con(result); } + case TYP_SIMD64: { - simd64_t cnsVec = vns->GetConstantSimd64(arg0VN); - cnsVec.f32[index] = value; - return vns->VNForSimd64Con(cnsVec); + simd64_t result = {}; + EvaluateWithElementFloating(baseType, &result, vns->GetConstantSimd64(arg0VN), arg1, arg2); + return vns->VNForSimd64Con(result); } #endif // TARGET_XARCH + default: { unreached(); @@ -8337,16 +8105,72 @@ ValueNum EvaluateSimdFloatWithElement(ValueNumStore* vns, var_types type, ValueN } } -ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - ValueNum arg1VN, - ValueNum arg2VN, - bool encodeResultType, - ValueNum resultTypeVN) +ValueNum EvaluateSimdWithElementIntegral( + ValueNumStore* vns, var_types simdType, var_types baseType, ValueNum arg0VN, int32_t arg1, int64_t arg2) { + assert(varTypeIsIntegral(baseType)); + assert(simdType == vns->TypeOfVN(arg0VN)); + assert(vns->IsVNConstant(arg0VN)); + assert(static_cast(arg1) < GenTreeVecCon::ElementCount(genTypeSize(simdType), baseType)); + + switch (simdType) + { + case TYP_SIMD8: + { + simd8_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd8(arg0VN), arg1, arg2); + return vns->VNForSimd8Con(result); + } + + case TYP_SIMD12: + { + simd12_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd12(arg0VN), arg1, arg2); + return vns->VNForSimd12Con(result); + } + + case TYP_SIMD16: + { + simd16_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd16(arg0VN), arg1, arg2); + return vns->VNForSimd16Con(result); + } + +#if defined TARGET_XARCH + case TYP_SIMD32: + { + simd32_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd32(arg0VN), arg1, arg2); + return vns->VNForSimd32Con(result); + } + + case TYP_SIMD64: + { + simd64_t result = {}; + EvaluateWithElementIntegral(baseType, &result, vns->GetConstantSimd64(arg0VN), arg1, arg2); + return vns->VNForSimd64Con(result); + } +#endif // TARGET_XARCH + + default: + { + unreached(); + } + } +} + +ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(GenTreeHWIntrinsic* tree, + VNFunc func, + ValueNum arg0VN, + ValueNum arg1VN, + ValueNum arg2VN, + bool encodeResultType, + ValueNum resultTypeVN) +{ + var_types type = tree->TypeGet(); + var_types baseType = tree->GetSimdBaseType(); + NamedIntrinsic ni = tree->GetHWIntrinsicId(); + if (IsVNConstant(arg0VN) && IsVNConstant(arg1VN) && IsVNConstant(arg2VN)) { switch (ni) @@ -8359,20 +8183,43 @@ ValueNum ValueNumStore::EvalHWIntrinsicFunTernary(var_types type, case NI_Vector512_WithElement: #endif { - int index = GetConstantInt32(arg1VN); + int32_t index = GetConstantInt32(arg1VN); - assert(varTypeIsSIMD(type)); - - // No meaningful diffs for other base-types. - if ((baseType != TYP_FLOAT) || (TypeOfVN(arg0VN) != type) || - (static_cast(index) >= (genTypeSize(type) / genTypeSize(baseType)))) + if (static_cast(index) >= GenTreeVecCon::ElementCount(genTypeSize(type), baseType)) { + // Nothing to fold for out of range indexes break; } - float value = GetConstantSingle(arg2VN); + if (varTypeIsFloating(baseType)) + { + double value; - return EvaluateSimdFloatWithElement(this, type, arg0VN, index, value); + if (baseType == TYP_FLOAT) + { + value = GetConstantSingle(arg2VN); + } + else + { + value = GetConstantDouble(arg2VN); + } + return EvaluateSimdWithElementFloating(this, type, baseType, arg0VN, index, value); + } + else + { + assert(varTypeIsIntegral(baseType)); + int64_t value; + + if (varTypeIsLong(baseType)) + { + value = GetConstantInt64(arg2VN); + } + else + { + value = GetConstantInt32(arg2VN); + } + return EvaluateSimdWithElementIntegral(this, type, baseType, arg0VN, index, value); + } } default: @@ -12125,12 +11972,10 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) if (opCount == 1) { - ValueNum normalLVN = vnStore->EvalHWIntrinsicFunUnary(tree->TypeGet(), tree->GetSimdBaseType(), - intrinsicId, func, op1vnp.GetLiberal(), - encodeResultType, resultTypeVNPair.GetLiberal()); + ValueNum normalLVN = vnStore->EvalHWIntrinsicFunUnary(tree, func, op1vnp.GetLiberal(), encodeResultType, + resultTypeVNPair.GetLiberal()); ValueNum normalCVN = - vnStore->EvalHWIntrinsicFunUnary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetConservative(), encodeResultType, + vnStore->EvalHWIntrinsicFunUnary(tree, func, op1vnp.GetConservative(), encodeResultType, resultTypeVNPair.GetConservative()); normalPair = ValueNumPair(normalLVN, normalCVN); @@ -12145,13 +11990,11 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) if (opCount == 2) { ValueNum normalLVN = - vnStore->EvalHWIntrinsicFunBinary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetLiberal(), op2vnp.GetLiberal(), encodeResultType, - resultTypeVNPair.GetLiberal()); - ValueNum normalCVN = - vnStore->EvalHWIntrinsicFunBinary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetConservative(), op2vnp.GetConservative(), - encodeResultType, resultTypeVNPair.GetConservative()); + vnStore->EvalHWIntrinsicFunBinary(tree, func, op1vnp.GetLiberal(), op2vnp.GetLiberal(), + encodeResultType, resultTypeVNPair.GetLiberal()); + ValueNum normalCVN = vnStore->EvalHWIntrinsicFunBinary(tree, func, op1vnp.GetConservative(), + op2vnp.GetConservative(), encodeResultType, + resultTypeVNPair.GetConservative()); normalPair = ValueNumPair(normalLVN, normalCVN); excSetPair = vnStore->VNPExcSetUnion(op1Xvnp, op2Xvnp); @@ -12165,15 +12008,13 @@ void Compiler::fgValueNumberHWIntrinsic(GenTreeHWIntrinsic* tree) getOperandVNs(tree->Op(3), &op3vnp, &op3Xvnp); ValueNum normalLVN = - vnStore->EvalHWIntrinsicFunTernary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetLiberal(), op2vnp.GetLiberal(), + vnStore->EvalHWIntrinsicFunTernary(tree, func, op1vnp.GetLiberal(), op2vnp.GetLiberal(), op3vnp.GetLiberal(), encodeResultType, resultTypeVNPair.GetLiberal()); ValueNum normalCVN = - vnStore->EvalHWIntrinsicFunTernary(tree->TypeGet(), tree->GetSimdBaseType(), intrinsicId, func, - op1vnp.GetConservative(), op2vnp.GetConservative(), - op3vnp.GetConservative(), encodeResultType, - resultTypeVNPair.GetConservative()); + vnStore->EvalHWIntrinsicFunTernary(tree, func, op1vnp.GetConservative(), + op2vnp.GetConservative(), op3vnp.GetConservative(), + encodeResultType, resultTypeVNPair.GetConservative()); normalPair = ValueNumPair(normalLVN, normalCVN); diff --git a/src/coreclr/jit/valuenum.h b/src/coreclr/jit/valuenum.h index 6a5032cd79ed74..72a0166357f4f6 100644 --- a/src/coreclr/jit/valuenum.h +++ b/src/coreclr/jit/valuenum.h @@ -1211,32 +1211,25 @@ class ValueNumStore EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetConservative(), arg1VNP.GetConservative())); } - ValueNum EvalHWIntrinsicFunUnary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - bool encodeResultType, - ValueNum resultTypeVN); - - ValueNum EvalHWIntrinsicFunBinary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - ValueNum arg1VN, - bool encodeResultType, - ValueNum resultTypeVN); - - ValueNum EvalHWIntrinsicFunTernary(var_types type, - var_types baseType, - NamedIntrinsic ni, - VNFunc func, - ValueNum arg0VN, - ValueNum arg1VN, - ValueNum arg2VN, - bool encodeResultType, - ValueNum resultTypeVN); +#if defined(FEATURE_HW_INTRINSICS) + ValueNum EvalHWIntrinsicFunUnary( + GenTreeHWIntrinsic* tree, VNFunc func, ValueNum arg0VN, bool encodeResultType, ValueNum resultTypeVN); + + ValueNum EvalHWIntrinsicFunBinary(GenTreeHWIntrinsic* tree, + VNFunc func, + ValueNum arg0VN, + ValueNum arg1VN, + bool encodeResultType, + ValueNum resultTypeVN); + + ValueNum EvalHWIntrinsicFunTernary(GenTreeHWIntrinsic* tree, + VNFunc func, + ValueNum arg0VN, + ValueNum arg1VN, + ValueNum arg2VN, + bool encodeResultType, + ValueNum resultTypeVN); +#endif // FEATURE_HW_INTRINSICS // Returns "true" iff "vn" represents a function application. bool IsVNFunc(ValueNum vn);