From 2e932a57ccb992b856b58bec4c30c6b64f24f711 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Thu, 28 Nov 2024 16:23:57 +0000 Subject: [PATCH 01/18] Flat casts WIP --- clang/include/clang/AST/OperationKinds.def | 3 + clang/include/clang/Sema/SemaHLSL.h | 2 + clang/lib/AST/Expr.cpp | 1 + clang/lib/AST/ExprConstant.cpp | 1 + clang/lib/CodeGen/CGExpr.cpp | 84 ++++++++++ clang/lib/CodeGen/CGExprAgg.cpp | 83 +++++++++- clang/lib/CodeGen/CGExprComplex.cpp | 1 + clang/lib/CodeGen/CGExprConstant.cpp | 1 + clang/lib/CodeGen/CGExprScalar.cpp | 39 +++++ clang/lib/CodeGen/CodeGenFunction.h | 7 + clang/lib/Edit/RewriteObjCFoundationAPI.cpp | 1 + clang/lib/Sema/Sema.cpp | 1 + clang/lib/Sema/SemaCast.cpp | 20 ++- clang/lib/Sema/SemaHLSL.cpp | 143 ++++++++++++++++++ clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 1 + 15 files changed, 384 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/AST/OperationKinds.def b/clang/include/clang/AST/OperationKinds.def index 8788b8ff0ef0a..9323d4e861a73 100644 --- a/clang/include/clang/AST/OperationKinds.def +++ b/clang/include/clang/AST/OperationKinds.def @@ -367,6 +367,9 @@ CAST_OPERATION(HLSLVectorTruncation) // Non-decaying array RValue cast (HLSL only). CAST_OPERATION(HLSLArrayRValue) +// Aggregate by Value cast (HLSL only). +CAST_OPERATION(HLSLAggregateCast) + //===- Binary Operations -------------------------------------------------===// // Operators listed in order of precedence. // Note that additions to this should also update the StmtVisitor class, diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index ee685d95c9615..6bda1e8ce0ea5 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -140,6 +140,8 @@ class SemaHLSL : public SemaBase { // Diagnose whether the input ID is uint/unit2/uint3 type. bool diagnoseInputIDType(QualType T, const ParsedAttr &AL); + bool CanPerformScalarCast(QualType SrcTy, QualType DestTy); + bool CanPerformAggregateCast(Expr *Src, QualType DestType); ExprResult ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg); QualType getInoutParameterType(QualType Ty); diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index a4fb4d5a1f2ec..4764bc84ce498 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1942,6 +1942,7 @@ bool CastExpr::CastConsistency() const { case CK_FixedPointToBoolean: case CK_HLSLArrayRValue: case CK_HLSLVectorTruncation: + case CK_HLSLAggregateCast: CheckNoBasePath: assert(path_empty() && "Cast kind should not have a base path!"); break; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 6b5b95aee3552..b548cef41b752 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -15733,6 +15733,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_IntegralToFixedPoint: case CK_MatrixCast: case CK_HLSLVectorTruncation: + case CK_HLSLAggregateCast: llvm_unreachable("invalid cast kind for complex value"); case CK_LValueToRValue: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 5fccc9cbb37ec..b7608b1226758 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5320,6 +5320,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_MatrixCast: case CK_HLSLVectorTruncation: case CK_HLSLArrayRValue: + case CK_HLSLAggregateCast: return EmitUnsupportedLValue(E, "unexpected cast lvalue"); case CK_Dependent: @@ -6358,3 +6359,86 @@ RValue CodeGenFunction::EmitPseudoObjectRValue(const PseudoObjectExpr *E, LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) { return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV; } + +llvm::Value* CodeGenFunction::PerformLoad(std::pair &GEP) { + Address GEPAddress = GEP.first; + llvm::Value *Idx = GEP.second; + llvm::Value *V = Builder.CreateLoad(GEPAddress, "load"); + if (Idx) { // loading from a vector so perform an extract as well + return Builder.CreateExtractElement(V, Idx, "vec.load"); + } + return V; +} + +llvm::Value* CodeGenFunction::PerformStore(std::pair &GEP, + llvm::Value *Val) { + Address GEPAddress = GEP.first; + llvm::Value *Idx = GEP.second; + if (Idx) { + llvm::Value *V = Builder.CreateLoad(GEPAddress, "load.for.insert"); + return Builder.CreateInsertElement(V, Val, Idx); + } else { + return Builder.CreateStore(Val, GEPAddress); + } +} + +void CodeGenFunction::FlattenAccessAndType(Address Val, QualType SrcTy, + SmallVector &IdxList, + SmallVector, 16> &GEPList, + SmallVector &FlatTypes) { + llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(),32); + if (const ConstantArrayType *CAT = dyn_cast(SrcTy)) { + uint64_t Size = CAT->getZExtSize(); + for(unsigned i = 0; i < Size; i ++) { + // flatten each member of the array + // add index of this element to index list + llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, i); + IdxList.push_back(Idx); + // recur on this object + FlattenAccessAndType(Val, CAT->getElementType(), IdxList, GEPList, FlatTypes); + // remove index of this element from index list + IdxList.pop_back(); + } + } else if (const RecordType *RT = SrcTy->getAs()) { + RecordDecl *Record = RT->getDecl(); + const CGRecordLayout &RL = getTypes().getCGRecordLayout(Record); + // do I need to check if its a cxx record decl? + + for (auto fieldIter = Record->field_begin(), fieldEnd = Record->field_end(); + fieldIter != fieldEnd; ++fieldIter) { + // get the field number + unsigned FieldNum = RL.getLLVMFieldNo(*fieldIter); + // can we just do *fieldIter->getFieldIndex(); + // add that index to the index list + llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, FieldNum); + IdxList.push_back(Idx); + // recur on the field + FlattenAccessAndType(Val, fieldIter->getType(), IdxList, GEPList, + FlatTypes); + // remove index of this element from index list + IdxList.pop_back(); + } + } else if (const VectorType *VT = SrcTy->getAs()) { + llvm::Type *VTy = ConvertTypeForMem(SrcTy); + CharUnits Align = getContext().getTypeAlignInChars(SrcTy); + Address GEP = Builder.CreateInBoundsGEP(Val, IdxList, + VTy, Align, "vector.gep"); + for(unsigned i = 0; i < VT->getNumElements(); i ++) { + // add index to the list + llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, i); + // create gep. no need to recur since its always a scalar + // gep on vector is not recommended so combine gep with extract/insert + GEPList.push_back({GEP, Idx}); + FlatTypes.push_back(VT->getElementType()); + } + } else { // should be a scalar should we assert or check? + // create a gep + llvm::Type *Ty = ConvertTypeForMem(SrcTy); + CharUnits Align = getContext().getTypeAlignInChars(SrcTy); + Address GEP = Builder.CreateInBoundsGEP(Val, IdxList, + Ty, Align, "gep"); + GEPList.push_back({GEP, NULL}); + FlatTypes.push_back(SrcTy); + } + // target extension types? +} diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 2ad6587089f10..bc8e1f0f9248e 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -491,6 +491,70 @@ static bool isTrivialFiller(Expr *E) { return false; } + + +// emit a flat cast where the RHS is a scalar, including vector +static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, + QualType DestTy, llvm::Value *SrcVal, + QualType SrcTy, SourceLocation Loc) { + // Flatten our destination + SmallVector DestTypes; // Flattened type + SmallVector IdxList; + SmallVector, 16> StoreGEPList; + // ^^ Flattened accesses to DestVal we want to store into + CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, + DestTypes); + + if (const VectorType *VT = SrcTy->getAs()) { + SrcTy = VT->getElementType(); + assert(StoreGEPList.size() <= VT->getNumElements() && + "Cannot perform HLSL flat cast when vector source \ + object has less elements than flattened destination \ + object."); + for(unsigned i = 0; i < StoreGEPList.size(); i ++) { + llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, i, + "vec.load"); + llvm::Value *Cast = CGF.EmitScalarConversion(Load, SrcTy, + DestTypes[i], + Loc); + CGF.PerformStore(StoreGEPList[i], Cast); + } + return; + } + llvm_unreachable("HLSL Flat cast doesn't handle splatting."); +} + +// emit a flat cast where the RHS is an aggregate +static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal, + QualType DestTy, Address SrcVal, + QualType SrcTy, SourceLocation Loc) { + // Flatten our destination + SmallVector DestTypes; // Flattened type + SmallVector IdxList; + SmallVector, 16> StoreGEPList; + // ^^ Flattened accesses to DestVal we want to store into + CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, + DestTypes); + // Flatten our src + SmallVector SrcTypes; // Flattened type + SmallVector, 16> LoadGEPList; + // ^^ Flattened accesses to SrcVal we want to load from + IdxList.clear(); + CGF.FlattenAccessAndType(SrcVal, SrcTy, IdxList, LoadGEPList, SrcTypes); + + assert(StoreGEPList.size() <= LoadGEPList.size() && + "Cannot perform HLSL flat cast when flattened source object \ + has less elements than flattened destination object."); + // apply casts to what we load from LoadGEPList + // and store result in Dest + for(unsigned i = 0; i < StoreGEPList.size(); i ++) { + llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]); + llvm::Value *Cast = CGF.EmitScalarConversion(Load, SrcTypes[i], + DestTypes[i], Loc); + CGF.PerformStore(StoreGEPList[i], Cast); + } +} + /// Emit initialization of an array from an initializer list. ExprToVisit must /// be either an InitListEpxr a CXXParenInitListExpr. void AggExprEmitter::EmitArrayInit(Address DestPtr, llvm::ArrayType *AType, @@ -890,7 +954,24 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { case CK_HLSLArrayRValue: Visit(E->getSubExpr()); break; - + case CK_HLSLAggregateCast: { + Expr *Src = E->getSubExpr(); + QualType SrcTy = Src->getType(); + RValue RV = CGF.EmitAnyExpr(Src); + QualType DestTy = E->getType(); + Address DestVal = Dest.getAddress(); + SourceLocation Loc = E->getExprLoc(); + + if (RV.isScalar()) { + llvm::Value *SrcVal = RV.getScalarVal(); + EmitHLSLScalarFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + } else { // RHS is an aggregate + assert(RV.isAggregate() && + "Can't perform HLSL Aggregate cast on a complex type."); + Address SrcVal = RV.getAggregateAddress(); + EmitHLSLAggregateFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + } + break; } case CK_NoOp: case CK_UserDefinedConversion: case CK_ConstructorConversion: diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index ac31dff11b585..05680d36aa2bd 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -610,6 +610,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_MatrixCast: case CK_HLSLVectorTruncation: case CK_HLSLArrayRValue: + case CK_HLSLAggregateCast: llvm_unreachable("invalid cast kind for complex value"); case CK_FloatingRealToComplex: diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 655fc3dc954c8..6d15bc9058e45 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1335,6 +1335,7 @@ class ConstExprEmitter case CK_MatrixCast: case CK_HLSLVectorTruncation: case CK_HLSLArrayRValue: + case CK_HLSLAggregateCast: return nullptr; } llvm_unreachable("Invalid CastKind"); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 4ae8a2b22b1bb..d7bb702ec3ca2 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2262,6 +2262,35 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) { return true; } +// RHS is an aggregate type +static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal, + QualType RHSTy, QualType LHSTy, + SourceLocation Loc) { + SmallVector IdxList; + SmallVector, 16> LoadGEPList; + SmallVector SrcTypes; // Flattened type + CGF.FlattenAccessAndType(RHSVal, RHSTy, IdxList, LoadGEPList, SrcTypes); + // LHS is either a vector or a builtin? + // if its a vector create a temp alloca to store into and return that + if (auto *VecTy = LHSTy->getAs()) { + llvm::Value *V = CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp")); + // write to V. + for(unsigned i = 0; i < VecTy->getNumElements(); i ++) { + llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]); + llvm::Value *Cast = CGF.EmitScalarConversion(Load, SrcTypes[i], + VecTy->getElementType(), Loc); + V = CGF.Builder.CreateInsertElement(V, Cast, i); + } + return V; + } + // i its a builtin just do an extract element or load. + assert(LHSTy->isBuiltinType() && + "Destination type must be a vector or builtin type."); + // TODO add asserts about things being long enough + return CGF.EmitScalarConversion(CGF.PerformLoad(LoadGEPList[0]), + LHSTy, SrcTypes[0], Loc); +} + // VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts // have to handle a more broad range of conversions than explicit casts, as they // handle things like function to ptr-to-function decay etc. @@ -2752,7 +2781,17 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy); return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc"); } + case CK_HLSLAggregateCast: { + RValue RV = CGF.EmitAnyExpr(E); + SourceLocation Loc = CE->getExprLoc(); + QualType SrcTy = E->getType(); + if (RV.isAggregate()) { // RHS is an aggregate + Address SrcVal = RV.getAggregateAddress(); + return EmitHLSLAggregateFlatCast(CGF, SrcVal, SrcTy, DestTy, Loc); + } + llvm_unreachable("Not a valid HLSL Flat Cast."); + } } // end of switch llvm_unreachable("unknown scalar cast"); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index eaea0d8a08ac0..b17ead377610e 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4359,6 +4359,13 @@ class CodeGenFunction : public CodeGenTypeCache { AggValueSlot slot = AggValueSlot::ignored()); LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e); + llvm::Value *PerformLoad(std::pair &GEP); + llvm::Value *PerformStore(std::pair &GEP, llvm::Value *Val); + void FlattenAccessAndType(Address Val, QualType SrcTy, + SmallVector &IdxList, + SmallVector, 16> &GEPList, + SmallVector &FlatTypes); + llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); llvm::Value *EmitIvarOffsetAsPointerDiff(const ObjCInterfaceDecl *Interface, diff --git a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp index 81797c8c4dc75..63308319a78d1 100644 --- a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp +++ b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp @@ -1085,6 +1085,7 @@ static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg, llvm_unreachable("OpenCL-specific cast in Objective-C?"); case CK_HLSLVectorTruncation: + case CK_HLSLAggregateCast: llvm_unreachable("HLSL-specific cast in Objective-C?"); break; diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index d6517511d7db4..2f0528d6ab5ce 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -707,6 +707,7 @@ ExprResult Sema::ImpCastExprToType(Expr *E, QualType Ty, case CK_ToVoid: case CK_NonAtomicToAtomic: case CK_HLSLArrayRValue: + case CK_HLSLAggregateCast: break; } } diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index f98857f852b5a..955c44cf4a6a4 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -25,6 +25,7 @@ #include "clang/Sema/Initialization.h" #include "clang/Sema/SemaObjC.h" #include "clang/Sema/SemaRISCV.h" +#include "clang/Sema/SemaHLSL.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include @@ -2768,6 +2769,22 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle, return; } + CheckedConversionKind CCK = FunctionalStyle + ? CheckedConversionKind::FunctionalCast + : CheckedConversionKind::CStyleCast; + // todo what else should i be doing lvalue to rvalue cast for? + // why dont they do it for records below? + // This case should not trigger on regular vector splat + // Or vector cast or vector truncation. + QualType SrcTy = SrcExpr.get()->getType(); + if (Self.getLangOpts().HLSL && + Self.HLSL().CanPerformAggregateCast(SrcExpr.get(), DestType)) { + if (SrcTy->isConstantArrayType()) + SrcExpr = Self.ImpCastExprToType(SrcExpr.get(), Self.Context.getArrayParameterType(SrcTy), CK_HLSLArrayRValue, VK_PRValue, nullptr, CCK); + Kind = CK_HLSLAggregateCast; + return; + } + if (ValueKind == VK_PRValue && !DestType->isRecordType() && !isPlaceholder(BuiltinType::Overload)) { SrcExpr = Self.DefaultFunctionArrayLvalueConversion(SrcExpr.get()); @@ -2820,9 +2837,6 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle, if (isValidCast(tcr)) Kind = CK_NoOp; - CheckedConversionKind CCK = FunctionalStyle - ? CheckedConversionKind::FunctionalCast - : CheckedConversionKind::CStyleCast; if (tcr == TC_NotApplicable) { tcr = TryAddressSpaceCast(Self, SrcExpr, DestType, /*CStyle*/ true, msg, Kind); diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 88db3e1254119..942c0a8fcaab0 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2412,6 +2412,149 @@ bool SemaHLSL::CheckCompatibleParameterABI(FunctionDecl *New, return HadError; } +// Follows PerformScalarCast +bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { + + if (SemaRef.getASTContext().hasSameUnqualifiedType(SrcTy, DestTy)) + return true; + + switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) { + case Type::STK_MemberPointer: + return false; + + case Type::STK_CPointer: + case Type::STK_BlockPointer: + case Type::STK_ObjCObjectPointer: + switch (DestTy->getScalarTypeKind()) { + case Type::STK_CPointer: + case Type::STK_BlockPointer: + case Type::STK_ObjCObjectPointer: + case Type::STK_Bool: + case Type::STK_Integral: + return true; + case Type::STK_Floating: + case Type::STK_FloatingComplex: + case Type::STK_IntegralComplex: + case Type::STK_MemberPointer: + return false; + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't have fixed point types."); + } + llvm_unreachable("Should have returned before this"); + + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't have fixed point types."); + + case Type::STK_Bool: // casting from bool is like casting from an integer + case Type::STK_Integral: + switch (DestTy->getScalarTypeKind()) { + case Type::STK_CPointer: + case Type::STK_ObjCObjectPointer: + case Type::STK_BlockPointer: + case Type::STK_Bool: + case Type::STK_Integral: + case Type::STK_Floating: + case Type::STK_IntegralComplex: + case Type::STK_FloatingComplex: + return true; + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't have fixed point types."); + case Type::STK_MemberPointer: + return false; + } + llvm_unreachable("Should have returned before this"); + + case Type::STK_Floating: + switch (DestTy->getScalarTypeKind()) { + case Type::STK_Floating: + case Type::STK_Bool: + case Type::STK_Integral: + case Type::STK_FloatingComplex: + case Type::STK_IntegralComplex: + return true; + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't have fixed point types."); + case Type::STK_CPointer: + case Type::STK_ObjCObjectPointer: + case Type::STK_BlockPointer: + case Type::STK_MemberPointer: + return false; + } + llvm_unreachable("Should have returned before this"); + + case Type::STK_FloatingComplex: + switch (DestTy->getScalarTypeKind()) { + case Type::STK_FloatingComplex: + case Type::STK_IntegralComplex: + case Type::STK_Floating: + case Type::STK_Bool: + case Type::STK_Integral: + return true; + case Type::STK_CPointer: + case Type::STK_ObjCObjectPointer: + case Type::STK_BlockPointer: + case Type::STK_MemberPointer: + return false; + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't have fixed point types."); + } + llvm_unreachable("Should have returned before this"); + + case Type::STK_IntegralComplex: + switch (DestTy->getScalarTypeKind()) { + case Type::STK_FloatingComplex: + case Type::STK_IntegralComplex: + case Type::STK_Integral: + case Type::STK_Bool: + case Type::STK_Floating: + return true; + case Type::STK_CPointer: + case Type::STK_ObjCObjectPointer: + case Type::STK_BlockPointer: + case Type::STK_MemberPointer: + return false; + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't have fixed point types."); + } + llvm_unreachable("Should have returned before this"); + } + + llvm_unreachable("Unhandled scalar cast"); +} + +// Can we perform an HLSL Flattened cast? +bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { + + // Don't handle casts where LHS and RHS are any combination of scalar/vector + // There must be an aggregate somewhere + QualType SrcTy = Src->getType(); + if (SrcTy->isScalarType()) // always a splat and this cast doesn't handle that + return false; + + if ((DestTy->isScalarType() || DestTy->isVectorType()) && + (SrcTy->isScalarType() || SrcTy->isVectorType())) + return false; + + llvm::SmallVector DestTypes; + BuildFlattenedTypeList(DestTy, DestTypes); + llvm::SmallVector SrcTypes; + BuildFlattenedTypeList(SrcTy, SrcTypes); + + // Usually the size of SrcTypes must be greater than or equal to the size of DestTypes. + if (SrcTypes.size() >= DestTypes.size()) { + + unsigned i; + for(i = 0; i < DestTypes.size() && i < SrcTypes.size(); i ++) { + if (!CanPerformScalarCast(SrcTypes[i], DestTypes[i])) { + return false; + } + } + return true; + } else { // can't cast, Src is wrong size for Dest + return false; + } +} + ExprResult SemaHLSL::ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg) { assert(Param->hasAttr() && "We should not get here without a parameter modifier expression"); diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index 7a900780384a9..067ff064861ce 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -522,6 +522,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, case CK_ToUnion: case CK_MatrixCast: case CK_VectorSplat: + case CK_HLSLAggregateCast: case CK_HLSLVectorTruncation: { QualType resultType = CastE->getType(); if (CastE->isGLValue()) From 121f2a9ac38f8a8098db51f3fd3ccdc6e3fa6f7b Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Thu, 5 Dec 2024 17:41:51 +0000 Subject: [PATCH 02/18] fix broken test --- clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl b/clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl index 7f6bdc7e67836..b004acdc7c502 100644 --- a/clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/vector-constructors-erros.hlsl @@ -17,6 +17,4 @@ void entry() { // These _should_ work in HLSL but aren't yet supported. S s; float2 GettingStrange = float2(s, s); // expected-error{{no viable conversion from 'S' to 'float'}} expected-error{{no viable conversion from 'S' to 'float'}} - S2 s2; - float2 EvenStranger = float2(s2); // expected-error{{cannot convert 'S2' to 'float2' (vector of 2 'float' values) without a conversion operator}} } From 9cc06ce79bbae61309ff0ab060e570d129fb0be8 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Thu, 5 Dec 2024 17:44:38 +0000 Subject: [PATCH 03/18] make clang format happy --- clang/lib/CodeGen/CGExpr.cpp | 36 +++++++------- clang/lib/CodeGen/CGExprAgg.cpp | 48 +++++++++---------- clang/lib/CodeGen/CGExprScalar.cpp | 19 ++++---- clang/lib/CodeGen/CodeGenFunction.h | 11 +++-- clang/lib/Sema/SemaCast.cpp | 6 ++- clang/lib/Sema/SemaHLSL.cpp | 7 +-- clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 2 +- 7 files changed, 66 insertions(+), 63 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index b7608b1226758..6b9c437ef7e24 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6360,7 +6360,8 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) { return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV; } -llvm::Value* CodeGenFunction::PerformLoad(std::pair &GEP) { +llvm::Value * +CodeGenFunction::PerformLoad(std::pair &GEP) { Address GEPAddress = GEP.first; llvm::Value *Idx = GEP.second; llvm::Value *V = Builder.CreateLoad(GEPAddress, "load"); @@ -6370,8 +6371,9 @@ llvm::Value* CodeGenFunction::PerformLoad(std::pair &GEP return V; } -llvm::Value* CodeGenFunction::PerformStore(std::pair &GEP, - llvm::Value *Val) { +llvm::Value * +CodeGenFunction::PerformStore(std::pair &GEP, + llvm::Value *Val) { Address GEPAddress = GEP.first; llvm::Value *Idx = GEP.second; if (Idx) { @@ -6382,20 +6384,21 @@ llvm::Value* CodeGenFunction::PerformStore(std::pair &GE } } -void CodeGenFunction::FlattenAccessAndType(Address Val, QualType SrcTy, - SmallVector &IdxList, - SmallVector, 16> &GEPList, - SmallVector &FlatTypes) { - llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(),32); +void CodeGenFunction::FlattenAccessAndType( + Address Val, QualType SrcTy, SmallVector &IdxList, + SmallVector, 16> &GEPList, + SmallVector &FlatTypes) { + llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(), 32); if (const ConstantArrayType *CAT = dyn_cast(SrcTy)) { uint64_t Size = CAT->getZExtSize(); - for(unsigned i = 0; i < Size; i ++) { + for (unsigned i = 0; i < Size; i++) { // flatten each member of the array // add index of this element to index list llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, i); IdxList.push_back(Idx); // recur on this object - FlattenAccessAndType(Val, CAT->getElementType(), IdxList, GEPList, FlatTypes); + FlattenAccessAndType(Val, CAT->getElementType(), IdxList, GEPList, + FlatTypes); // remove index of this element from index list IdxList.pop_back(); } @@ -6405,7 +6408,7 @@ void CodeGenFunction::FlattenAccessAndType(Address Val, QualType SrcTy, // do I need to check if its a cxx record decl? for (auto fieldIter = Record->field_begin(), fieldEnd = Record->field_end(); - fieldIter != fieldEnd; ++fieldIter) { + fieldIter != fieldEnd; ++fieldIter) { // get the field number unsigned FieldNum = RL.getLLVMFieldNo(*fieldIter); // can we just do *fieldIter->getFieldIndex(); @@ -6414,16 +6417,16 @@ void CodeGenFunction::FlattenAccessAndType(Address Val, QualType SrcTy, IdxList.push_back(Idx); // recur on the field FlattenAccessAndType(Val, fieldIter->getType(), IdxList, GEPList, - FlatTypes); + FlatTypes); // remove index of this element from index list IdxList.pop_back(); } } else if (const VectorType *VT = SrcTy->getAs()) { llvm::Type *VTy = ConvertTypeForMem(SrcTy); CharUnits Align = getContext().getTypeAlignInChars(SrcTy); - Address GEP = Builder.CreateInBoundsGEP(Val, IdxList, - VTy, Align, "vector.gep"); - for(unsigned i = 0; i < VT->getNumElements(); i ++) { + Address GEP = + Builder.CreateInBoundsGEP(Val, IdxList, VTy, Align, "vector.gep"); + for (unsigned i = 0; i < VT->getNumElements(); i++) { // add index to the list llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, i); // create gep. no need to recur since its always a scalar @@ -6435,8 +6438,7 @@ void CodeGenFunction::FlattenAccessAndType(Address Val, QualType SrcTy, // create a gep llvm::Type *Ty = ConvertTypeForMem(SrcTy); CharUnits Align = getContext().getTypeAlignInChars(SrcTy); - Address GEP = Builder.CreateInBoundsGEP(Val, IdxList, - Ty, Align, "gep"); + Address GEP = Builder.CreateInBoundsGEP(Val, IdxList, Ty, Align, "gep"); GEPList.push_back({GEP, NULL}); FlatTypes.push_back(SrcTy); } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index bc8e1f0f9248e..e3b47de958ce5 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -491,50 +491,45 @@ static bool isTrivialFiller(Expr *E) { return false; } - - // emit a flat cast where the RHS is a scalar, including vector static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, - QualType DestTy, llvm::Value *SrcVal, - QualType SrcTy, SourceLocation Loc) { + QualType DestTy, llvm::Value *SrcVal, + QualType SrcTy, SourceLocation Loc) { // Flatten our destination SmallVector DestTypes; // Flattened type SmallVector IdxList; SmallVector, 16> StoreGEPList; // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, - DestTypes); + CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes); if (const VectorType *VT = SrcTy->getAs()) { SrcTy = VT->getElementType(); assert(StoreGEPList.size() <= VT->getNumElements() && - "Cannot perform HLSL flat cast when vector source \ + "Cannot perform HLSL flat cast when vector source \ object has less elements than flattened destination \ object."); - for(unsigned i = 0; i < StoreGEPList.size(); i ++) { - llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, i, - "vec.load"); - llvm::Value *Cast = CGF.EmitScalarConversion(Load, SrcTy, - DestTypes[i], - Loc); - CGF.PerformStore(StoreGEPList[i], Cast); - } - return; + for (unsigned i = 0; i < StoreGEPList.size(); i++) { + llvm::Value *Load = + CGF.Builder.CreateExtractElement(SrcVal, i, "vec.load"); + llvm::Value *Cast = + CGF.EmitScalarConversion(Load, SrcTy, DestTypes[i], Loc); + CGF.PerformStore(StoreGEPList[i], Cast); + } + return; } llvm_unreachable("HLSL Flat cast doesn't handle splatting."); } // emit a flat cast where the RHS is an aggregate static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal, - QualType DestTy, Address SrcVal, - QualType SrcTy, SourceLocation Loc) { + QualType DestTy, Address SrcVal, + QualType SrcTy, SourceLocation Loc) { // Flatten our destination SmallVector DestTypes; // Flattened type SmallVector IdxList; SmallVector, 16> StoreGEPList; // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, - DestTypes); + CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes); // Flatten our src SmallVector SrcTypes; // Flattened type SmallVector, 16> LoadGEPList; @@ -543,14 +538,14 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal, CGF.FlattenAccessAndType(SrcVal, SrcTy, IdxList, LoadGEPList, SrcTypes); assert(StoreGEPList.size() <= LoadGEPList.size() && - "Cannot perform HLSL flat cast when flattened source object \ + "Cannot perform HLSL flat cast when flattened source object \ has less elements than flattened destination object."); // apply casts to what we load from LoadGEPList // and store result in Dest - for(unsigned i = 0; i < StoreGEPList.size(); i ++) { + for (unsigned i = 0; i < StoreGEPList.size(); i++) { llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]); - llvm::Value *Cast = CGF.EmitScalarConversion(Load, SrcTypes[i], - DestTypes[i], Loc); + llvm::Value *Cast = + CGF.EmitScalarConversion(Load, SrcTypes[i], DestTypes[i], Loc); CGF.PerformStore(StoreGEPList[i], Cast); } } @@ -967,11 +962,12 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { EmitHLSLScalarFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); } else { // RHS is an aggregate assert(RV.isAggregate() && - "Can't perform HLSL Aggregate cast on a complex type."); + "Can't perform HLSL Aggregate cast on a complex type."); Address SrcVal = RV.getAggregateAddress(); EmitHLSLAggregateFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); } - break; } + break; + } case CK_NoOp: case CK_UserDefinedConversion: case CK_ConstructorConversion: diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index d7bb702ec3ca2..3809e3b1db349 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2264,8 +2264,8 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) { // RHS is an aggregate type static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal, - QualType RHSTy, QualType LHSTy, - SourceLocation Loc) { + QualType RHSTy, QualType LHSTy, + SourceLocation Loc) { SmallVector IdxList; SmallVector, 16> LoadGEPList; SmallVector SrcTypes; // Flattened type @@ -2273,22 +2273,23 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal, // LHS is either a vector or a builtin? // if its a vector create a temp alloca to store into and return that if (auto *VecTy = LHSTy->getAs()) { - llvm::Value *V = CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp")); + llvm::Value *V = + CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp")); // write to V. - for(unsigned i = 0; i < VecTy->getNumElements(); i ++) { + for (unsigned i = 0; i < VecTy->getNumElements(); i++) { llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]); - llvm::Value *Cast = CGF.EmitScalarConversion(Load, SrcTypes[i], - VecTy->getElementType(), Loc); + llvm::Value *Cast = CGF.EmitScalarConversion( + Load, SrcTypes[i], VecTy->getElementType(), Loc); V = CGF.Builder.CreateInsertElement(V, Cast, i); } return V; } // i its a builtin just do an extract element or load. assert(LHSTy->isBuiltinType() && - "Destination type must be a vector or builtin type."); + "Destination type must be a vector or builtin type."); // TODO add asserts about things being long enough - return CGF.EmitScalarConversion(CGF.PerformLoad(LoadGEPList[0]), - LHSTy, SrcTypes[0], Loc); + return CGF.EmitScalarConversion(CGF.PerformLoad(LoadGEPList[0]), LHSTy, + SrcTypes[0], Loc); } // VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index b17ead377610e..873dd781eb2e7 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4360,11 +4360,12 @@ class CodeGenFunction : public CodeGenTypeCache { LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e); llvm::Value *PerformLoad(std::pair &GEP); - llvm::Value *PerformStore(std::pair &GEP, llvm::Value *Val); - void FlattenAccessAndType(Address Val, QualType SrcTy, - SmallVector &IdxList, - SmallVector, 16> &GEPList, - SmallVector &FlatTypes); + llvm::Value *PerformStore(std::pair &GEP, + llvm::Value *Val); + void FlattenAccessAndType( + Address Val, QualType SrcTy, SmallVector &IdxList, + SmallVector, 16> &GEPList, + SmallVector &FlatTypes); llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 955c44cf4a6a4..0bd7fc91aee18 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -23,9 +23,9 @@ #include "clang/Basic/TargetInfo.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/Initialization.h" +#include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaObjC.h" #include "clang/Sema/SemaRISCV.h" -#include "clang/Sema/SemaHLSL.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include @@ -2780,7 +2780,9 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle, if (Self.getLangOpts().HLSL && Self.HLSL().CanPerformAggregateCast(SrcExpr.get(), DestType)) { if (SrcTy->isConstantArrayType()) - SrcExpr = Self.ImpCastExprToType(SrcExpr.get(), Self.Context.getArrayParameterType(SrcTy), CK_HLSLArrayRValue, VK_PRValue, nullptr, CCK); + SrcExpr = Self.ImpCastExprToType( + SrcExpr.get(), Self.Context.getArrayParameterType(SrcTy), + CK_HLSLArrayRValue, VK_PRValue, nullptr, CCK); Kind = CK_HLSLAggregateCast; return; } diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 942c0a8fcaab0..5c7af8056063a 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2530,7 +2530,7 @@ bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { QualType SrcTy = Src->getType(); if (SrcTy->isScalarType()) // always a splat and this cast doesn't handle that return false; - + if ((DestTy->isScalarType() || DestTy->isVectorType()) && (SrcTy->isScalarType() || SrcTy->isVectorType())) return false; @@ -2540,11 +2540,12 @@ bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { llvm::SmallVector SrcTypes; BuildFlattenedTypeList(SrcTy, SrcTypes); - // Usually the size of SrcTypes must be greater than or equal to the size of DestTypes. + // Usually the size of SrcTypes must be greater than or equal to the size of + // DestTypes. if (SrcTypes.size() >= DestTypes.size()) { unsigned i; - for(i = 0; i < DestTypes.size() && i < SrcTypes.size(); i ++) { + for (i = 0; i < DestTypes.size() && i < SrcTypes.size(); i++) { if (!CanPerformScalarCast(SrcTypes[i], DestTypes[i])) { return false; } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index 067ff064861ce..b105c196fc3bf 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -522,7 +522,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, case CK_ToUnion: case CK_MatrixCast: case CK_VectorSplat: - case CK_HLSLAggregateCast: + case CK_HLSLAggregateCast: case CK_HLSLVectorTruncation: { QualType resultType = CastE->getType(); if (CastE->isGLValue()) From e3e51b6761f2e9af61bfa6ae87860e05484e93c0 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Thu, 5 Dec 2024 17:46:16 +0000 Subject: [PATCH 04/18] CodeGen tests --- .../BasicFeatures/ArrayFlatCast.hlsl | 128 ++++++++++++++++++ .../BasicFeatures/StructFlatCast.hlsl | 124 +++++++++++++++++ .../BasicFeatures/VectorFlatCast.hlsl | 81 +++++++++++ 3 files changed, 333 insertions(+) create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl new file mode 100644 index 0000000000000..23a71a2ecc6b9 --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s + +// array truncation +// CHECK-LABEL: define void {{.*}}call1 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +export void call1() { + int A[2] = {0,1}; + int B[1] = {4}; + B = (int[1])A; +} + +// just a cast +// CHECK-LABEL: define void {{.*}}call2 +// CHECK: [[A:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [1 x float], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[A]], i8 0, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 4, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float +// CHECK-NEXT: store float [[C]], ptr [[G1]], align 4 +export void call2() { + int A[1] = {0}; + float B[1] = {1.0}; + B = (float[1])A; +} + +// vector to array +// CHECK-LABEL: define void {{.*}}call3 +// CHECK: [[A:%.*]] = alloca <1 x float>, align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: store <1 x float> splat (float 0x3FF3333340000000), ptr [[A]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false) +// CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0 +// CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0 +// CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32 +// CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4 +export void call3() { + float1 A = {1.2}; + int B[1] = {1}; + B = (int[1])A; +} + +// flatten array of vector to array with cast +// CHECK-LABEL: define void {{.*}}call5 +// CHECK: [[A:%.*]] = alloca [1 x <2 x float>], align 8 +// CHECK-NEXT: [[B:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [1 x <2 x float>], align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A]], ptr align 8 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Tmp]], ptr align 8 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 1 +// CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8 +// CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0 +// CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32 +// CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load <2 x float>, ptr [[VG]], align 8 +// CHECK-NEXT: [[VL5:%.*]] = extractelement <2 x float> [[L4]], i32 1 +// CHECK-NEXT: [[C6:%.*]] = fptosi float [[VL5]] to i32 +// CHECK-NEXT: store i32 [[C6]], ptr [[G2]], align 4 +export void call5() { + float2 A[1] = {{1.2,3.4}}; + int B[2] = {1,2}; + B = (int[2])A; +} + +// flatten 2d array +// CHECK-LABEL: define void {{.*}}call6 +// CHECK: [[A:%.*]] = alloca [2 x [1 x i32]], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x [1 x i32]], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 1, i32 0 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 +// CHECK-NEXT: store i32 [[L4]], ptr [[G2]], align 4 +export void call6() { + int A[2][1] = {{1},{3}}; + int B[2] = {1,2}; + B = (int[2])A; +} + +struct S { + int X; + float Y; +}; + +// flatten and truncate from a struct +// CHECK-LABEL: define void {{.*}}call7 +// CHECK: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca [1 x i32], align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 4, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +export void call7() { + S s = {1, 2.9}; + int A[1] = {1}; + A = (int[1])s; +} + diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl new file mode 100644 index 0000000000000..c44e340109abb --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl @@ -0,0 +1,124 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +struct S { + int X; + float Y; +}; + +// struct from vector +// CHECK-LABEL: define void {{.*}}call1 +// CHECK: [[A:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: store <2 x i32> , ptr [[A]], align 8 +// CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[A]], align 8 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 1 +// CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0 +// CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4 +// CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[VL2]] to float +// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 +export void call1() { + int2 A = {1,2}; + S s = (S)A; +} + + +// struct from array +// CHECK-LABEL: define void {{.*}}call2 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float +// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 +export void call2() { + int A[2] = {1,2}; + S s = (S)A; +} + +struct Q { + int Z; +}; + +struct R { + Q q; + float F; +}; + +// struct from nested struct? +// CHECK-LABEL: define void {{.*}}call6 +// CHECK: [[r:%.*]] = alloca %struct.R, align 4 +// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.R, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[r]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[r]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.R, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds %struct.R, ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load float, ptr [[G4]], align 4 +// CHECK-NEXT: store float [[L4]], ptr [[G2]], align 4 +export void call6() { + R r = {{1}, 2.0}; + S s = (S)r; +} + +// nested struct from array? +// CHECK-LABEL: define void {{.*}}call7 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[r:%.*]] = alloca %struct.R, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.R, ptr [[r]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.R, ptr [[r]], i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 +// CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 +// CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L4]] to float +// CHECK-NEXT: store float [[C]], ptr [[G2]], align 4 +export void call7() { + int A[2] = {1,2}; + R r = (R)A; +} + +struct T { + int A; + int B; + int C; +}; + +// struct truncation +// CHECK-LABEL: define void {{.*}}call8 +// CHECK: [[t:%.*]] = alloca %struct.T, align 4 +// CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.T, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[t]], ptr align 4 {{.*}}, i32 12, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[t]], i32 12, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.T, ptr [[Tmp]], i32 0 +// CHECK-NEXT: %gep3 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 1 +// CHECK-NEXT: %gep4 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 2 +// CHECK-NEXT: %load = load i32, ptr %gep2, align 4 +// CHECK-NEXT: store i32 %load, ptr %gep, align 4 +// CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4 +// CHECK-NEXT: %conv = sitofp i32 %load5 to float +// CHECK-NEXT: store float %conv, ptr %gep1, align 4 +export void call8() { + T t = {1,2,3}; + S s = (S)t; +} diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl new file mode 100644 index 0000000000000..9cd320ee9f62d --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl @@ -0,0 +1,81 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +// vector flat cast from array +// CHECK-LABEL: define void {{.*}}call2 +// CHECK: [[A:%.*]] = alloca [2 x [1 x i32]], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x [1 x i32]], align 4 +// CHECK-NEXT: [[Tmp2:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 1, i32 0 +// CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[Tmp2]], align 8 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: [[D:%.*]] = insertelement <2 x i32> [[C]], i32 [[L]], i64 0 +// CHECK-NEXT: [[L2:%.*]] = load i32, ptr [[G2]], align 4 +// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[D]], i32 [[L2]], i64 1 +// CHECK-NEXT: store <2 x i32> [[E]], ptr [[B]], align 8 +export void call2() { + int A[2][1] = {{1},{2}}; + int2 B = (int2)A; +} + +struct S { + int X; + float Y; +}; + +// vector flat cast from struct +// CHECK-LABEL: define void {{.*}}call3 +// CHECK: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[Tmp2:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[B:%.*]] = load <2 x i32>, ptr [[Tmp2]], align 8 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0 +// CHECK-NEXT: [[L2:%.*]] = load float, ptr [[G2]], align 4 +// CHECK-NEXT: [[D:%.*]] = fptosi float [[L2]] to i32 +// CHECK-NEXT: [[E:%.*]] = insertelement <2 x i32> [[C]], i32 [[D]], i64 1 +// CHECK-NEXT: store <2 x i32> [[E]], ptr [[A]], align 8 +export void call3() { + S s = {1, 2.0}; + int2 A = (int2)s; +} + +// truncate array to scalar +// CHECK-LABEL: define void {{.*}}call4 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[B]], align 4 +export void call4() { + int A[2] = {1,2}; + int B = (int)A; +} + +// truncate struct to scalar +// CHECK-LABEL: define void {{.*}}call5 +// CHECK: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[A]], align 4 +export void call5() { + S s = {1, 2.0}; + int A = (int)s; +} From 28858755ed754b2ba9affd92728505c5a5c39787 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Fri, 6 Dec 2024 19:12:34 +0000 Subject: [PATCH 05/18] address PR comments add some todos that need to be addressed --- clang/lib/AST/ExprConstant.cpp | 1 + clang/lib/CodeGen/CGExprAgg.cpp | 1 + clang/lib/Sema/SemaHLSL.cpp | 91 ++++++++------------------------- 3 files changed, 23 insertions(+), 70 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b548cef41b752..e782e6227234a 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14857,6 +14857,7 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_FixedPointCast: case CK_IntegralToFixedPoint: case CK_MatrixCast: + // TODO does CK_HLSLAggregateCast belong here? llvm_unreachable("invalid cast kind for integral value"); case CK_BitCast: diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index e3b47de958ce5..b0d3a0fb5e590 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -1538,6 +1538,7 @@ static bool castPreservesZero(const CastExpr *CE) { case CK_NonAtomicToAtomic: case CK_AtomicToNonAtomic: case CK_HLSLVectorTruncation: + // TODO does CK_HLSLAggregateCast preserve zero? return true; case CK_BaseToDerivedMemberPointer: diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 5c7af8056063a..8c32e77d4f799 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2420,30 +2420,13 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) { case Type::STK_MemberPointer: - return false; - case Type::STK_CPointer: case Type::STK_BlockPointer: case Type::STK_ObjCObjectPointer: - switch (DestTy->getScalarTypeKind()) { - case Type::STK_CPointer: - case Type::STK_BlockPointer: - case Type::STK_ObjCObjectPointer: - case Type::STK_Bool: - case Type::STK_Integral: - return true; - case Type::STK_Floating: - case Type::STK_FloatingComplex: - case Type::STK_IntegralComplex: - case Type::STK_MemberPointer: - return false; - case Type::STK_FixedPoint: - llvm_unreachable("HLSL doesn't have fixed point types."); - } - llvm_unreachable("Should have returned before this"); + llvm_unreachable("HLSL doesn't support pointers."); case Type::STK_FixedPoint: - llvm_unreachable("HLSL doesn't have fixed point types."); + llvm_unreachable("HLSL doesn't support fixed point types."); case Type::STK_Bool: // casting from bool is like casting from an integer case Type::STK_Integral: @@ -2451,16 +2434,17 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: + case Type::STK_MemberPointer: + llvm_unreachable("HLSL doesn't support pointers."); case Type::STK_Bool: case Type::STK_Integral: case Type::STK_Floating: + return true; case Type::STK_IntegralComplex: case Type::STK_FloatingComplex: - return true; + llvm_unreachable("HLSL doesn't support complex types."); case Type::STK_FixedPoint: - llvm_unreachable("HLSL doesn't have fixed point types."); - case Type::STK_MemberPointer: - return false; + llvm_unreachable("HLSL doesn't support fixed point types."); } llvm_unreachable("Should have returned before this"); @@ -2469,60 +2453,30 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { case Type::STK_Floating: case Type::STK_Bool: case Type::STK_Integral: + return true; case Type::STK_FloatingComplex: case Type::STK_IntegralComplex: - return true; + llvm_unreachable("HLSL doesn't support complex types."); case Type::STK_FixedPoint: - llvm_unreachable("HLSL doesn't have fixed point types."); + llvm_unreachable("HLSL doesn't support fixed point types."); case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: case Type::STK_MemberPointer: - return false; + llvm_unreachable("HLSL doesn't support pointers."); } llvm_unreachable("Should have returned before this"); case Type::STK_FloatingComplex: - switch (DestTy->getScalarTypeKind()) { - case Type::STK_FloatingComplex: - case Type::STK_IntegralComplex: - case Type::STK_Floating: - case Type::STK_Bool: - case Type::STK_Integral: - return true; - case Type::STK_CPointer: - case Type::STK_ObjCObjectPointer: - case Type::STK_BlockPointer: - case Type::STK_MemberPointer: - return false; - case Type::STK_FixedPoint: - llvm_unreachable("HLSL doesn't have fixed point types."); - } - llvm_unreachable("Should have returned before this"); - case Type::STK_IntegralComplex: - switch (DestTy->getScalarTypeKind()) { - case Type::STK_FloatingComplex: - case Type::STK_IntegralComplex: - case Type::STK_Integral: - case Type::STK_Bool: - case Type::STK_Floating: - return true; - case Type::STK_CPointer: - case Type::STK_ObjCObjectPointer: - case Type::STK_BlockPointer: - case Type::STK_MemberPointer: - return false; - case Type::STK_FixedPoint: - llvm_unreachable("HLSL doesn't have fixed point types."); - } - llvm_unreachable("Should have returned before this"); + llvm_unreachable("HLSL doesn't support complex types."); } llvm_unreachable("Unhandled scalar cast"); } // Can we perform an HLSL Flattened cast? +// TODO: update this code when matrices are added bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { // Don't handle casts where LHS and RHS are any combination of scalar/vector @@ -2531,8 +2485,8 @@ bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { if (SrcTy->isScalarType()) // always a splat and this cast doesn't handle that return false; - if ((DestTy->isScalarType() || DestTy->isVectorType()) && - (SrcTy->isScalarType() || SrcTy->isVectorType())) + if (SrcTy->isVectorType() && + (DestTy->isScalarType() || DestTy->isVectorType())) return false; llvm::SmallVector DestTypes; @@ -2542,18 +2496,15 @@ bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { // Usually the size of SrcTypes must be greater than or equal to the size of // DestTypes. - if (SrcTypes.size() >= DestTypes.size()) { + if (SrcTypes.size() < DestTypes.size()) + return false; - unsigned i; - for (i = 0; i < DestTypes.size() && i < SrcTypes.size(); i++) { - if (!CanPerformScalarCast(SrcTypes[i], DestTypes[i])) { - return false; - } + for (unsigned i = 0; i < DestTypes.size() && i < SrcTypes.size(); i++) { + if (!CanPerformScalarCast(SrcTypes[i], DestTypes[i])) { + return false; } - return true; - } else { // can't cast, Src is wrong size for Dest - return false; } + return true; } ExprResult SemaHLSL::ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg) { From c5650eddc62863948b24646d98bd443c2f4e8edc Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Sat, 7 Dec 2024 17:16:39 +0000 Subject: [PATCH 06/18] new tests --- .../SemaHLSL/Language/FlatCast-errors.hlsl | 8 +++++++ clang/test/SemaHLSL/Language/FlatCasts.hlsl | 23 +++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 clang/test/SemaHLSL/Language/FlatCast-errors.hlsl create mode 100644 clang/test/SemaHLSL/Language/FlatCasts.hlsl diff --git a/clang/test/SemaHLSL/Language/FlatCast-errors.hlsl b/clang/test/SemaHLSL/Language/FlatCast-errors.hlsl new file mode 100644 index 0000000000000..0197e8b1e3676 --- /dev/null +++ b/clang/test/SemaHLSL/Language/FlatCast-errors.hlsl @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -verify + +export void cantCast() { + int A[3] = {1,2,3}; + int B[4] = {1,2,3,4}; + B = (int[4])A; + // expected-error@-1 {{C-style cast from 'int *' to 'int[4]' is not allowed}} +} \ No newline at end of file diff --git a/clang/test/SemaHLSL/Language/FlatCasts.hlsl b/clang/test/SemaHLSL/Language/FlatCasts.hlsl new file mode 100644 index 0000000000000..c869b32f0276c --- /dev/null +++ b/clang/test/SemaHLSL/Language/FlatCasts.hlsl @@ -0,0 +1,23 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -finclude-default-header -fnative-half-type %s -ast-dump | FileCheck %s + +// truncation +// CHECK-LABEL: call1 +// CHECK: CStyleCastExpr {{.*}} 'int[1]' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int[2]' part_of_explicit_cast +// CHECK-NEXT: DeclRefExpr {{.*}} 'int[2]' lvalue Var {{.*}} 'A' 'int[2]' +export void call1() { + int A[2] = {0,1}; + int B[1] = {4}; + B = (int[1])A; +} + +// flat cast of equal size +// CHECK-LABEL: call2 +// CHECK: CStyleCastExpr {{.*}} 'float[1]' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int[1]' part_of_explicit_cast +// CHECK-NEXT: DeclRefExpr {{.*}} 'int[1]' lvalue Var {{.*}} 'A' 'int[1]' +export void call2() { + int A[1] = {0}; + float B[1] = {1.0}; + B = (float[1])A; +} From f4819b80212065b611c6b1efec02cf4db15f664a Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Mon, 9 Dec 2024 20:31:57 +0000 Subject: [PATCH 07/18] Self Review. --- clang/lib/CodeGen/CGExpr.cpp | 24 ------------- clang/lib/CodeGen/CGExprAgg.cpp | 34 +++++++++++++++++-- clang/lib/CodeGen/CGExprScalar.cpp | 15 ++++++-- clang/lib/CodeGen/CodeGenFunction.h | 3 -- clang/lib/Sema/SemaCast.cpp | 4 +-- clang/lib/Sema/SemaHLSL.cpp | 29 ++++++++-------- .../BasicFeatures/ArrayFlatCast.hlsl | 32 ++++++++--------- .../BasicFeatures/StructFlatCast.hlsl | 34 +++++++++---------- .../BasicFeatures/VectorFlatCast.hlsl | 16 ++++----- 9 files changed, 100 insertions(+), 91 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 6b9c437ef7e24..f85f10eeb422d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6360,30 +6360,6 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) { return emitPseudoObjectExpr(*this, E, true, AggValueSlot::ignored()).LV; } -llvm::Value * -CodeGenFunction::PerformLoad(std::pair &GEP) { - Address GEPAddress = GEP.first; - llvm::Value *Idx = GEP.second; - llvm::Value *V = Builder.CreateLoad(GEPAddress, "load"); - if (Idx) { // loading from a vector so perform an extract as well - return Builder.CreateExtractElement(V, Idx, "vec.load"); - } - return V; -} - -llvm::Value * -CodeGenFunction::PerformStore(std::pair &GEP, - llvm::Value *Val) { - Address GEPAddress = GEP.first; - llvm::Value *Idx = GEP.second; - if (Idx) { - llvm::Value *V = Builder.CreateLoad(GEPAddress, "load.for.insert"); - return Builder.CreateInsertElement(V, Val, Idx); - } else { - return Builder.CreateStore(Val, GEPAddress); - } -} - void CodeGenFunction::FlattenAccessAndType( Address Val, QualType SrcTy, SmallVector &IdxList, SmallVector, 16> &GEPList, diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index b0d3a0fb5e590..cb4e24062aad2 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -498,6 +498,9 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, // Flatten our destination SmallVector DestTypes; // Flattened type SmallVector IdxList; + IdxList.push_back( + llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32), + 0)); // because an Address is a pointer SmallVector, 16> StoreGEPList; // ^^ Flattened accesses to DestVal we want to store into CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes); @@ -513,7 +516,15 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, CGF.Builder.CreateExtractElement(SrcVal, i, "vec.load"); llvm::Value *Cast = CGF.EmitScalarConversion(Load, SrcTy, DestTypes[i], Loc); - CGF.PerformStore(StoreGEPList[i], Cast); + + // store back + llvm::Value *Idx = StoreGEPList[i].second; + if (Idx) { + llvm::Value *V = + CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert"); + Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); + } + CGF.Builder.CreateStore(Cast, StoreGEPList[i].first); } return; } @@ -527,6 +538,9 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal, // Flatten our destination SmallVector DestTypes; // Flattened type SmallVector IdxList; + IdxList.push_back( + llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32), + 0)); // Because an Address is a pointer SmallVector, 16> StoreGEPList; // ^^ Flattened accesses to DestVal we want to store into CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes); @@ -535,6 +549,9 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal, SmallVector, 16> LoadGEPList; // ^^ Flattened accesses to SrcVal we want to load from IdxList.clear(); + IdxList.push_back( + llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32), + 0)); // Because an Address is a pointer CGF.FlattenAccessAndType(SrcVal, SrcTy, IdxList, LoadGEPList, SrcTypes); assert(StoreGEPList.size() <= LoadGEPList.size() && @@ -543,10 +560,21 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal, // apply casts to what we load from LoadGEPList // and store result in Dest for (unsigned i = 0; i < StoreGEPList.size(); i++) { - llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]); + llvm::Value *Idx = LoadGEPList[i].second; + llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[i].first, "load"); + Load = + Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; llvm::Value *Cast = CGF.EmitScalarConversion(Load, SrcTypes[i], DestTypes[i], Loc); - CGF.PerformStore(StoreGEPList[i], Cast); + + // store back + Idx = StoreGEPList[i].second; + if (Idx) { + llvm::Value *V = + CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert"); + Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); + } + CGF.Builder.CreateStore(Cast, StoreGEPList[i].first); } } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 3809e3b1db349..b906f89fb6201 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2267,6 +2267,9 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal, QualType RHSTy, QualType LHSTy, SourceLocation Loc) { SmallVector IdxList; + IdxList.push_back( + llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32), + 0)); // because an Address is a pointer SmallVector, 16> LoadGEPList; SmallVector SrcTypes; // Flattened type CGF.FlattenAccessAndType(RHSVal, RHSTy, IdxList, LoadGEPList, SrcTypes); @@ -2277,7 +2280,10 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal, CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp")); // write to V. for (unsigned i = 0; i < VecTy->getNumElements(); i++) { - llvm::Value *Load = CGF.PerformLoad(LoadGEPList[i]); + llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[i].first, "load"); + llvm::Value *Idx = LoadGEPList[i].second; + Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") + : Load; llvm::Value *Cast = CGF.EmitScalarConversion( Load, SrcTypes[i], VecTy->getElementType(), Loc); V = CGF.Builder.CreateInsertElement(V, Cast, i); @@ -2288,8 +2294,11 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal, assert(LHSTy->isBuiltinType() && "Destination type must be a vector or builtin type."); // TODO add asserts about things being long enough - return CGF.EmitScalarConversion(CGF.PerformLoad(LoadGEPList[0]), LHSTy, - SrcTypes[0], Loc); + llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load"); + llvm::Value *Idx = LoadGEPList[0].second; + Load = + Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; + return CGF.EmitScalarConversion(Load, LHSTy, SrcTypes[0], Loc); } // VisitCastExpr - Emit code for an explicit or implicit cast. Implicit casts diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 873dd781eb2e7..9a08f1fcae27f 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4359,9 +4359,6 @@ class CodeGenFunction : public CodeGenTypeCache { AggValueSlot slot = AggValueSlot::ignored()); LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e); - llvm::Value *PerformLoad(std::pair &GEP); - llvm::Value *PerformStore(std::pair &GEP, - llvm::Value *Val); void FlattenAccessAndType( Address Val, QualType SrcTy, SmallVector &IdxList, SmallVector, 16> &GEPList, diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 0bd7fc91aee18..35a9afc0d1607 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2772,10 +2772,8 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle, CheckedConversionKind CCK = FunctionalStyle ? CheckedConversionKind::FunctionalCast : CheckedConversionKind::CStyleCast; - // todo what else should i be doing lvalue to rvalue cast for? - // why dont they do it for records below? // This case should not trigger on regular vector splat - // Or vector cast or vector truncation. + // vector cast, vector truncation, or special hlsl splat cases QualType SrcTy = SrcExpr.get()->getType(); if (Self.getLangOpts().HLSL && Self.HLSL().CanPerformAggregateCast(SrcExpr.get(), DestType)) { diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 8c32e77d4f799..3ba5b1f9a95bf 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2412,34 +2412,26 @@ bool SemaHLSL::CheckCompatibleParameterABI(FunctionDecl *New, return HadError; } -// Follows PerformScalarCast +// Generally follows PerformScalarCast, with cases reordered for +// clarity of what types are supported bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { if (SemaRef.getASTContext().hasSameUnqualifiedType(SrcTy, DestTy)) return true; switch (Type::ScalarTypeKind SrcKind = SrcTy->getScalarTypeKind()) { - case Type::STK_MemberPointer: - case Type::STK_CPointer: - case Type::STK_BlockPointer: - case Type::STK_ObjCObjectPointer: - llvm_unreachable("HLSL doesn't support pointers."); - - case Type::STK_FixedPoint: - llvm_unreachable("HLSL doesn't support fixed point types."); - case Type::STK_Bool: // casting from bool is like casting from an integer case Type::STK_Integral: switch (DestTy->getScalarTypeKind()) { + case Type::STK_Bool: + case Type::STK_Integral: + case Type::STK_Floating: + return true; case Type::STK_CPointer: case Type::STK_ObjCObjectPointer: case Type::STK_BlockPointer: case Type::STK_MemberPointer: llvm_unreachable("HLSL doesn't support pointers."); - case Type::STK_Bool: - case Type::STK_Integral: - case Type::STK_Floating: - return true; case Type::STK_IntegralComplex: case Type::STK_FloatingComplex: llvm_unreachable("HLSL doesn't support complex types."); @@ -2467,6 +2459,15 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { } llvm_unreachable("Should have returned before this"); + case Type::STK_MemberPointer: + case Type::STK_CPointer: + case Type::STK_BlockPointer: + case Type::STK_ObjCObjectPointer: + llvm_unreachable("HLSL doesn't support pointers."); + + case Type::STK_FixedPoint: + llvm_unreachable("HLSL doesn't support fixed point types."); + case Type::STK_FloatingComplex: case Type::STK_IntegralComplex: llvm_unreachable("HLSL doesn't support complex types."); diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl index 23a71a2ecc6b9..afe301143ebc6 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl @@ -8,9 +8,9 @@ // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0 -// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 export void call1() { @@ -27,8 +27,8 @@ export void call1() { // CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[A]], i8 0, i32 4, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 4, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x float], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [1 x i32], ptr [[Tmp]], i32 0, i32 0 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 // CHECK-NEXT: [[C:%.*]] = sitofp i32 [[L]] to float // CHECK-NEXT: store float [[C]], ptr [[G1]], align 4 @@ -45,7 +45,7 @@ export void call2() { // CHECK-NEXT: store <1 x float> splat (float 0x3FF3333340000000), ptr [[A]], align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 4, i1 false) // CHECK-NEXT: [[C:%.*]] = load <1 x float>, ptr [[A]], align 4 -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[B]], i32 0, i32 0 // CHECK-NEXT: [[V:%.*]] = extractelement <1 x float> [[C]], i64 0 // CHECK-NEXT: [[C:%.*]] = fptosi float [[V]] to i32 // CHECK-NEXT: store i32 [[C]], ptr [[G1]], align 4 @@ -63,9 +63,9 @@ export void call3() { // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A]], ptr align 8 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[Tmp]], ptr align 8 [[A]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 1 -// CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 1 +// CHECK-NEXT: [[VG:%.*]] = getelementptr inbounds [1 x <2 x float>], ptr [[Tmp]], i32 0, i32 0 // CHECK-NEXT: [[L:%.*]] = load <2 x float>, ptr [[VG]], align 8 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x float> [[L]], i32 0 // CHECK-NEXT: [[C:%.*]] = fptosi float [[VL]] to i32 @@ -88,10 +88,10 @@ export void call5() { // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 1 -// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0 -// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 1, i32 0 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i32 0, i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 1, i32 0 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 @@ -115,9 +115,9 @@ struct S { // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 4, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0 -// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [1 x i32], ptr [[A]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G2]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 export void call7() { diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl index c44e340109abb..3ebdb0c329817 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl @@ -11,8 +11,8 @@ struct S { // CHECK-NEXT: [[s:%.*]] = alloca %struct.S, align 4 // CHECK-NEXT: store <2 x i32> , ptr [[A]], align 8 // CHECK-NEXT: [[L:%.*]] = load <2 x i32>, ptr [[A]], align 8 -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 1 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 // CHECK-NEXT: [[VL:%.*]] = extractelement <2 x i32> [[L]], i64 0 // CHECK-NEXT: store i32 [[VL]], ptr [[G1]], align 4 // CHECK-NEXT: [[VL2:%.*]] = extractelement <2 x i32> [[L]], i64 1 @@ -31,10 +31,10 @@ export void call1() { // CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 1 -// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0 -// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 @@ -61,10 +61,10 @@ struct R { // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.R, align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[r]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[r]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 1 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 // CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.R, ptr [[Tmp]], i32 0, i32 0 -// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds %struct.R, ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds %struct.R, ptr [[Tmp]], i32 0, i32 1 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 // CHECK-NEXT: [[L4:%.*]] = load float, ptr [[G4]], align 4 @@ -82,9 +82,9 @@ export void call6() { // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) // CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.R, ptr [[r]], i32 0, i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.R, ptr [[r]], i32 1 -// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0 -// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.R, ptr [[r]], i32 0, i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G4:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G3]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[G1]], align 4 // CHECK-NEXT: [[L4:%.*]] = load i32, ptr [[G4]], align 4 @@ -108,11 +108,11 @@ struct T { // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.T, align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[t]], ptr align 4 {{.*}}, i32 12, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[t]], i32 12, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 1 -// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.T, ptr [[Tmp]], i32 0 -// CHECK-NEXT: %gep3 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 1 -// CHECK-NEXT: %gep4 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 2 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[s]], i32 0, i32 1 +// CHECK-NEXT: [[G3:%.*]] = getelementptr inbounds %struct.T, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: %gep3 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 1 +// CHECK-NEXT: %gep4 = getelementptr inbounds %struct.T, ptr %agg-temp, i32 0, i32 2 // CHECK-NEXT: %load = load i32, ptr %gep2, align 4 // CHECK-NEXT: store i32 %load, ptr %gep, align 4 // CHECK-NEXT: %load5 = load i32, ptr %gep3, align 4 diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl index 9cd320ee9f62d..f579dfb377de5 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl @@ -8,8 +8,8 @@ // CHECK-NEXT: [[Tmp2:%.*]] = alloca <2 x i32>, align 8 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 1, i32 0 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x [1 x i32]], ptr [[Tmp]], i32 0, i32 1, i32 0 // CHECK-NEXT: [[C:%.*]] = load <2 x i32>, ptr [[Tmp2]], align 8 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 // CHECK-NEXT: [[D:%.*]] = insertelement <2 x i32> [[C]], i32 [[L]], i64 0 @@ -34,8 +34,8 @@ struct S { // CHECK-NEXT: [[Tmp2:%.*]] = alloca <2 x i32>, align 8 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1 // CHECK-NEXT: [[B:%.*]] = load <2 x i32>, ptr [[Tmp2]], align 8 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 // CHECK-NEXT: [[C:%.*]] = insertelement <2 x i32> [[B]], i32 [[L]], i64 0 @@ -55,8 +55,8 @@ export void call3() { // CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[B]], align 4 export void call4() { @@ -71,8 +71,8 @@ export void call4() { // CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 4 // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) // CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) -// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0 -// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 1 +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1 // CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 // CHECK-NEXT: store i32 [[L]], ptr [[A]], align 4 export void call5() { From 89709adc295d94c3befd10ab0b1408ed5045c8bb Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Wed, 11 Dec 2024 19:14:51 +0000 Subject: [PATCH 08/18] self review continued. Make FlattenAccessAndTypes not recursive and handle records correctly. --- clang/lib/CodeGen/CGExpr.cpp | 137 ++++++++++++++++------------ clang/lib/CodeGen/CGExprAgg.cpp | 24 ++--- clang/lib/CodeGen/CGExprScalar.cpp | 8 +- clang/lib/CodeGen/CodeGenFunction.h | 6 +- 4 files changed, 92 insertions(+), 83 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index f85f10eeb422d..62980f5077e95 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6361,62 +6361,87 @@ LValue CodeGenFunction::EmitPseudoObjectLValue(const PseudoObjectExpr *E) { } void CodeGenFunction::FlattenAccessAndType( - Address Val, QualType SrcTy, SmallVector &IdxList, - SmallVector, 16> &GEPList, - SmallVector &FlatTypes) { + Address Addr, QualType AddrType, + SmallVectorImpl> &AccessList, + SmallVectorImpl &FlatTypes) { + // WorkList is list of type we are processing + the Index List to access + // the field of that type in Addr for use in a GEP + llvm::SmallVector>, + 16> + WorkList; llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(), 32); - if (const ConstantArrayType *CAT = dyn_cast(SrcTy)) { - uint64_t Size = CAT->getZExtSize(); - for (unsigned i = 0; i < Size; i++) { - // flatten each member of the array - // add index of this element to index list - llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, i); - IdxList.push_back(Idx); - // recur on this object - FlattenAccessAndType(Val, CAT->getElementType(), IdxList, GEPList, - FlatTypes); - // remove index of this element from index list - IdxList.pop_back(); - } - } else if (const RecordType *RT = SrcTy->getAs()) { - RecordDecl *Record = RT->getDecl(); - const CGRecordLayout &RL = getTypes().getCGRecordLayout(Record); - // do I need to check if its a cxx record decl? - - for (auto fieldIter = Record->field_begin(), fieldEnd = Record->field_end(); - fieldIter != fieldEnd; ++fieldIter) { - // get the field number - unsigned FieldNum = RL.getLLVMFieldNo(*fieldIter); - // can we just do *fieldIter->getFieldIndex(); - // add that index to the index list - llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, FieldNum); - IdxList.push_back(Idx); - // recur on the field - FlattenAccessAndType(Val, fieldIter->getType(), IdxList, GEPList, - FlatTypes); - // remove index of this element from index list - IdxList.pop_back(); - } - } else if (const VectorType *VT = SrcTy->getAs()) { - llvm::Type *VTy = ConvertTypeForMem(SrcTy); - CharUnits Align = getContext().getTypeAlignInChars(SrcTy); - Address GEP = - Builder.CreateInBoundsGEP(Val, IdxList, VTy, Align, "vector.gep"); - for (unsigned i = 0; i < VT->getNumElements(); i++) { - // add index to the list - llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, i); - // create gep. no need to recur since its always a scalar - // gep on vector is not recommended so combine gep with extract/insert - GEPList.push_back({GEP, Idx}); - FlatTypes.push_back(VT->getElementType()); + WorkList.push_back( + {AddrType, + {llvm::ConstantInt::get( + IdxTy, + 0)}}); // Addr should be a pointer so we need to 'dereference' it + + while (!WorkList.empty()) { + std::pair> P = + WorkList.pop_back_val(); + QualType T = P.first; + llvm::SmallVector IdxList = P.second; + T = T.getCanonicalType().getUnqualifiedType(); + assert(!isa(T) && "Matrix types not yet supported in HLSL"); + if (const auto *CAT = dyn_cast(T)) { + uint64_t Size = CAT->getZExtSize(); + for (int64_t i = Size - 1; i > -1; i--) { + llvm::SmallVector IdxListCopy = IdxList; + IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, i)); + WorkList.insert(WorkList.end(), {CAT->getElementType(), IdxListCopy}); + } + } else if (const auto *RT = dyn_cast(T)) { + const RecordDecl *Record = RT->getDecl(); + if (Record->isUnion()) { + IdxList.push_back(llvm::ConstantInt::get(IdxTy, 0)); + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + Address GEP = + Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "union.gep"); + AccessList.push_back({GEP, NULL}); + FlatTypes.push_back(T); + continue; + } + const CXXRecordDecl *CXXD = dyn_cast(Record); + + llvm::SmallVector FieldTypes; + if (CXXD && CXXD->isStandardLayout()) + Record = CXXD->getStandardLayoutBaseWithFields(); + + // deal with potential base classes + if (CXXD && !CXXD->isStandardLayout()) { + for (auto &Base : CXXD->bases()) + FieldTypes.push_back(Base.getType()); + } + + for (auto *FD : Record->fields()) + FieldTypes.push_back(FD->getType()); + + for (int64_t i = FieldTypes.size() - 1; i > -1; i--) { + llvm::SmallVector IdxListCopy = IdxList; + IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, i)); + WorkList.insert(WorkList.end(), {FieldTypes[i], IdxListCopy}); + } + } else if (const auto *VT = dyn_cast(T)) { + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + Address GEP = + Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "vector.gep"); + for (unsigned i = 0; i < VT->getNumElements(); i++) { + llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, i); + // gep on vector fields is not recommended so combine gep with + // extract/insert + AccessList.push_back({GEP, Idx}); + FlatTypes.push_back(VT->getElementType()); + } + } else { + // a scalar/builtin type + llvm::Type *LLVMT = ConvertTypeForMem(T); + CharUnits Align = getContext().getTypeAlignInChars(T); + Address GEP = + Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "gep"); + AccessList.push_back({GEP, NULL}); + FlatTypes.push_back(T); } - } else { // should be a scalar should we assert or check? - // create a gep - llvm::Type *Ty = ConvertTypeForMem(SrcTy); - CharUnits Align = getContext().getTypeAlignInChars(SrcTy); - Address GEP = Builder.CreateInBoundsGEP(Val, IdxList, Ty, Align, "gep"); - GEPList.push_back({GEP, NULL}); - FlatTypes.push_back(SrcTy); - } - // target extension types? + } } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index cb4e24062aad2..62a85e983d1c9 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -496,14 +496,10 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, QualType DestTy, llvm::Value *SrcVal, QualType SrcTy, SourceLocation Loc) { // Flatten our destination - SmallVector DestTypes; // Flattened type - SmallVector IdxList; - IdxList.push_back( - llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32), - 0)); // because an Address is a pointer + SmallVector DestTypes; // Flattened type SmallVector, 16> StoreGEPList; // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes); + CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); if (const VectorType *VT = SrcTy->getAs()) { SrcTy = VT->getElementType(); @@ -536,23 +532,15 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal, QualType DestTy, Address SrcVal, QualType SrcTy, SourceLocation Loc) { // Flatten our destination - SmallVector DestTypes; // Flattened type - SmallVector IdxList; - IdxList.push_back( - llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32), - 0)); // Because an Address is a pointer + SmallVector DestTypes; // Flattened type SmallVector, 16> StoreGEPList; // ^^ Flattened accesses to DestVal we want to store into - CGF.FlattenAccessAndType(DestVal, DestTy, IdxList, StoreGEPList, DestTypes); + CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); // Flatten our src - SmallVector SrcTypes; // Flattened type + SmallVector SrcTypes; // Flattened type SmallVector, 16> LoadGEPList; // ^^ Flattened accesses to SrcVal we want to load from - IdxList.clear(); - IdxList.push_back( - llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32), - 0)); // Because an Address is a pointer - CGF.FlattenAccessAndType(SrcVal, SrcTy, IdxList, LoadGEPList, SrcTypes); + CGF.FlattenAccessAndType(SrcVal, SrcTy, LoadGEPList, SrcTypes); assert(StoreGEPList.size() <= LoadGEPList.size() && "Cannot perform HLSL flat cast when flattened source object \ diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index b906f89fb6201..e0f9be5642cd7 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2266,13 +2266,9 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) { static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal, QualType RHSTy, QualType LHSTy, SourceLocation Loc) { - SmallVector IdxList; - IdxList.push_back( - llvm::ConstantInt::get(llvm::IntegerType::get(CGF.getLLVMContext(), 32), - 0)); // because an Address is a pointer SmallVector, 16> LoadGEPList; - SmallVector SrcTypes; // Flattened type - CGF.FlattenAccessAndType(RHSVal, RHSTy, IdxList, LoadGEPList, SrcTypes); + SmallVector SrcTypes; // Flattened type + CGF.FlattenAccessAndType(RHSVal, RHSTy, LoadGEPList, SrcTypes); // LHS is either a vector or a builtin? // if its a vector create a temp alloca to store into and return that if (auto *VecTy = LHSTy->getAs()) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 9a08f1fcae27f..1d513d20e81b7 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4360,9 +4360,9 @@ class CodeGenFunction : public CodeGenTypeCache { LValue EmitPseudoObjectLValue(const PseudoObjectExpr *e); void FlattenAccessAndType( - Address Val, QualType SrcTy, SmallVector &IdxList, - SmallVector, 16> &GEPList, - SmallVector &FlatTypes); + Address Addr, QualType AddrTy, + SmallVectorImpl> &AccessList, + SmallVectorImpl &FlatTypes); llvm::Value *EmitIvarOffset(const ObjCInterfaceDecl *Interface, const ObjCIvarDecl *Ivar); From 162c2b5078e2741ee70d4657e40e167d5b876740 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Fri, 20 Dec 2024 17:48:49 +0000 Subject: [PATCH 09/18] two test showing truncation to scalar --- .../CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl | 16 ++++++++++++++++ .../BasicFeatures/StructFlatCast.hlsl | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl index afe301143ebc6..18f82bff3b308 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl @@ -1,5 +1,21 @@ // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s +// array truncation to a scalar +// CHECK-LABEL: define void {{.*}}call0 +// CHECK: [[A:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[B:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[A]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[A]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds [2 x i32], ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[B]], align 4 +export void call0() { + int A[2] = {0,1}; + float B = (float)A; +} + // array truncation // CHECK-LABEL: define void {{.*}}call1 // CHECK: [[A:%.*]] = alloca [2 x i32], align 4 diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl index 3ebdb0c329817..26fde37c901dd 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl @@ -5,6 +5,22 @@ struct S { float Y; }; +// struct truncation to a scalar +// CHECK-LABEL: define void {{.*}}call0 +// CHECK: [[s:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Tmp:%.*]] = alloca %struct.S, align 4 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[s]], ptr align 4 {{.*}}, i32 8, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[s]], i32 8, i1 false) +// CHECK-NEXT: [[G1:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 0 +// CHECK-NEXT: [[G2:%.*]] = getelementptr inbounds %struct.S, ptr [[Tmp]], i32 0, i32 1 +// CHECK-NEXT: [[L:%.*]] = load i32, ptr [[G1]], align 4 +// CHECK-NEXT: store i32 [[L]], ptr [[A]], align 4 +export void call0() { + S s = {1,2}; + int A = (int)s; +} + // struct from vector // CHECK-LABEL: define void {{.*}}call1 // CHECK: [[A:%.*]] = alloca <2 x i32>, align 8 From fa0e9c90a2b2b19ba466ac26a162f9d50eb583a3 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Wed, 22 Jan 2025 12:01:36 -0800 Subject: [PATCH 10/18] respond to pr comments --- clang/lib/CodeGen/CGExpr.cpp | 16 +++--------- clang/lib/CodeGen/CGExprAgg.cpp | 42 ++++++++++++++---------------- clang/lib/CodeGen/CGExprScalar.cpp | 9 +++---- clang/lib/Sema/SemaHLSL.cpp | 15 ++++++++--- 4 files changed, 40 insertions(+), 42 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 62980f5077e95..44f37c5bede06 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6385,23 +6385,15 @@ void CodeGenFunction::FlattenAccessAndType( assert(!isa(T) && "Matrix types not yet supported in HLSL"); if (const auto *CAT = dyn_cast(T)) { uint64_t Size = CAT->getZExtSize(); - for (int64_t i = Size - 1; i > -1; i--) { + for (int64_t I = Size - 1; I > -1; I--) { llvm::SmallVector IdxListCopy = IdxList; - IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, i)); + IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I)); WorkList.insert(WorkList.end(), {CAT->getElementType(), IdxListCopy}); } } else if (const auto *RT = dyn_cast(T)) { const RecordDecl *Record = RT->getDecl(); - if (Record->isUnion()) { - IdxList.push_back(llvm::ConstantInt::get(IdxTy, 0)); - llvm::Type *LLVMT = ConvertTypeForMem(T); - CharUnits Align = getContext().getTypeAlignInChars(T); - Address GEP = - Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "union.gep"); - AccessList.push_back({GEP, NULL}); - FlatTypes.push_back(T); - continue; - } + assert(!Record->isUnion() && "Union types not supported in flat cast."); + const CXXRecordDecl *CXXD = dyn_cast(Record); llvm::SmallVector FieldTypes; diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 62a85e983d1c9..7ac9188ed077d 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -501,30 +501,28 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, // ^^ Flattened accesses to DestVal we want to store into CGF.FlattenAccessAndType(DestVal, DestTy, StoreGEPList, DestTypes); - if (const VectorType *VT = SrcTy->getAs()) { - SrcTy = VT->getElementType(); - assert(StoreGEPList.size() <= VT->getNumElements() && - "Cannot perform HLSL flat cast when vector source \ - object has less elements than flattened destination \ - object."); - for (unsigned i = 0; i < StoreGEPList.size(); i++) { - llvm::Value *Load = - CGF.Builder.CreateExtractElement(SrcVal, i, "vec.load"); - llvm::Value *Cast = - CGF.EmitScalarConversion(Load, SrcTy, DestTypes[i], Loc); - - // store back - llvm::Value *Idx = StoreGEPList[i].second; - if (Idx) { - llvm::Value *V = - CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert"); - Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); - } - CGF.Builder.CreateStore(Cast, StoreGEPList[i].first); + assert(SrcTy->isVectorType() && "HLSL Flat cast doesn't handle splatting."); + const VectorType *VT = SrcTy->getAs(); + SrcTy = VT->getElementType(); + assert(StoreGEPList.size() <= VT->getNumElements() && + "Cannot perform HLSL flat cast when vector source \ + object has less elements than flattened destination \ + object."); + for (unsigned i = 0; i < StoreGEPList.size(); i++) { + llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, i, "vec.load"); + llvm::Value *Cast = + CGF.EmitScalarConversion(Load, SrcTy, DestTypes[i], Loc); + + // store back + llvm::Value *Idx = StoreGEPList[i].second; + if (Idx) { + llvm::Value *V = + CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert"); + Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); } - return; + CGF.Builder.CreateStore(Cast, StoreGEPList[i].first); } - llvm_unreachable("HLSL Flat cast doesn't handle splatting."); + return; } // emit a flat cast where the RHS is an aggregate diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index e0f9be5642cd7..e7f5a4f06f9bc 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2792,11 +2792,10 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { SourceLocation Loc = CE->getExprLoc(); QualType SrcTy = E->getType(); - if (RV.isAggregate()) { // RHS is an aggregate - Address SrcVal = RV.getAggregateAddress(); - return EmitHLSLAggregateFlatCast(CGF, SrcVal, SrcTy, DestTy, Loc); - } - llvm_unreachable("Not a valid HLSL Flat Cast."); + assert(RV.isAggregate() && "Not a valid HLSL Flat Cast."); + // RHS is an aggregate + Address SrcVal = RV.getAggregateAddress(); + return EmitHLSLAggregateFlatCast(CGF, SrcVal, SrcTy, DestTy, Loc); } } // end of switch diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 3ba5b1f9a95bf..56013b1ff1566 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2477,7 +2477,7 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { } // Can we perform an HLSL Flattened cast? -// TODO: update this code when matrices are added +// TODO: update this code when matrices are added; see issue #88060 bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { // Don't handle casts where LHS and RHS are any combination of scalar/vector @@ -2500,11 +2500,20 @@ bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { if (SrcTypes.size() < DestTypes.size()) return false; - for (unsigned i = 0; i < DestTypes.size() && i < SrcTypes.size(); i++) { - if (!CanPerformScalarCast(SrcTypes[i], DestTypes[i])) { + unsigned I; + for (I = 0; I < DestTypes.size() && I < SrcTypes.size(); I++) { + if (SrcTypes[I]->isUnionType() || DestTypes[I]->isUnionType()) + return false; + if (!CanPerformScalarCast(SrcTypes[I], DestTypes[I])) { return false; } } + + // check the rest of the source type for unions. + for (; I < SrcTypes.size(); I++) { + if (SrcTypes[I]->isUnionType()) + return false; + } return true; } From afb5be23c1e0c0e287f86c60580dd03a84dddfd7 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Wed, 22 Jan 2025 12:10:56 -0800 Subject: [PATCH 11/18] add case to switch --- clang/lib/CodeGen/CGExprAgg.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 7ac9188ed077d..9a433f3850f13 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -1552,7 +1552,7 @@ static bool castPreservesZero(const CastExpr *CE) { case CK_NonAtomicToAtomic: case CK_AtomicToNonAtomic: case CK_HLSLVectorTruncation: - // TODO does CK_HLSLAggregateCast preserve zero? + case CK_HLSLAggregateCast: // TODO does CK_HLSLAggregateCast preserve zero? return true; case CK_BaseToDerivedMemberPointer: From c08f927c6d18f08eed81369d70bce290e9540795 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Fri, 31 Jan 2025 16:54:55 -0800 Subject: [PATCH 12/18] add newline to end of file --- clang/test/SemaHLSL/Language/FlatCast-errors.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/SemaHLSL/Language/FlatCast-errors.hlsl b/clang/test/SemaHLSL/Language/FlatCast-errors.hlsl index 0197e8b1e3676..b5f482940dbdd 100644 --- a/clang/test/SemaHLSL/Language/FlatCast-errors.hlsl +++ b/clang/test/SemaHLSL/Language/FlatCast-errors.hlsl @@ -5,4 +5,4 @@ export void cantCast() { int B[4] = {1,2,3,4}; B = (int[4])A; // expected-error@-1 {{C-style cast from 'int *' to 'int[4]' is not allowed}} -} \ No newline at end of file +} From 20bab28fda6f3d1b6cd4c597ea514b32c223e9c3 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Fri, 31 Jan 2025 17:01:36 -0800 Subject: [PATCH 13/18] update cases with the cast --- clang/lib/CodeGen/CGExprAgg.cpp | 2 +- clang/lib/Sema/Sema.cpp | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 9a433f3850f13..bc8e1a80331de 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -1552,7 +1552,7 @@ static bool castPreservesZero(const CastExpr *CE) { case CK_NonAtomicToAtomic: case CK_AtomicToNonAtomic: case CK_HLSLVectorTruncation: - case CK_HLSLAggregateCast: // TODO does CK_HLSLAggregateCast preserve zero? + case CK_HLSLAggregateCast: return true; case CK_BaseToDerivedMemberPointer: diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 2f0528d6ab5ce..d6517511d7db4 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -707,7 +707,6 @@ ExprResult Sema::ImpCastExprToType(Expr *E, QualType Ty, case CK_ToVoid: case CK_NonAtomicToAtomic: case CK_HLSLArrayRValue: - case CK_HLSLAggregateCast: break; } } From a0f5473618fb329d35e0c4db592b17461d5fe87f Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Wed, 5 Feb 2025 09:28:51 -0800 Subject: [PATCH 14/18] address pr comments --- clang/lib/AST/ExprConstant.cpp | 2 +- clang/lib/CodeGen/CGExpr.cpp | 20 +++++++------------- clang/lib/CodeGen/CGExprAgg.cpp | 14 +++++++------- clang/lib/CodeGen/CGExprScalar.cpp | 13 +++++++------ 4 files changed, 22 insertions(+), 27 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e782e6227234a..a3f8e26b9a782 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14857,7 +14857,6 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_FixedPointCast: case CK_IntegralToFixedPoint: case CK_MatrixCast: - // TODO does CK_HLSLAggregateCast belong here? llvm_unreachable("invalid cast kind for integral value"); case CK_BitCast: @@ -14876,6 +14875,7 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_NoOp: case CK_LValueToRValueBitCast: case CK_HLSLArrayRValue: + case CK_HLSLAggregateCast: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_MemberPointerToBoolean: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 44f37c5bede06..401e7b04a7dd7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6370,17 +6370,11 @@ void CodeGenFunction::FlattenAccessAndType( 16> WorkList; llvm::IntegerType *IdxTy = llvm::IntegerType::get(getLLVMContext(), 32); - WorkList.push_back( - {AddrType, - {llvm::ConstantInt::get( - IdxTy, - 0)}}); // Addr should be a pointer so we need to 'dereference' it + // Addr should be a pointer so we need to 'dereference' it + WorkList.push_back({AddrType, {llvm::ConstantInt::get(IdxTy, 0)}}); while (!WorkList.empty()) { - std::pair> P = - WorkList.pop_back_val(); - QualType T = P.first; - llvm::SmallVector IdxList = P.second; + auto [T, IdxList] = WorkList.pop_back_val(); T = T.getCanonicalType().getUnqualifiedType(); assert(!isa(T) && "Matrix types not yet supported in HLSL"); if (const auto *CAT = dyn_cast(T)) { @@ -6388,7 +6382,7 @@ void CodeGenFunction::FlattenAccessAndType( for (int64_t I = Size - 1; I > -1; I--) { llvm::SmallVector IdxListCopy = IdxList; IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I)); - WorkList.insert(WorkList.end(), {CAT->getElementType(), IdxListCopy}); + WorkList.emplace_back(CAT->getElementType(), IdxListCopy); } } else if (const auto *RT = dyn_cast(T)) { const RecordDecl *Record = RT->getDecl(); @@ -6419,8 +6413,8 @@ void CodeGenFunction::FlattenAccessAndType( CharUnits Align = getContext().getTypeAlignInChars(T); Address GEP = Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "vector.gep"); - for (unsigned i = 0; i < VT->getNumElements(); i++) { - llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, i); + for (unsigned I = 0, E = VT->getNumElements(); I < E; I++) { + llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, I); // gep on vector fields is not recommended so combine gep with // extract/insert AccessList.push_back({GEP, Idx}); @@ -6432,7 +6426,7 @@ void CodeGenFunction::FlattenAccessAndType( CharUnits Align = getContext().getTypeAlignInChars(T); Address GEP = Builder.CreateInBoundsGEP(Addr, IdxList, LLVMT, Align, "gep"); - AccessList.push_back({GEP, NULL}); + AccessList.emplace_back(GEP, nullptr); FlatTypes.push_back(T); } } diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index bc8e1a80331de..8755d231a0972 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -508,19 +508,19 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, "Cannot perform HLSL flat cast when vector source \ object has less elements than flattened destination \ object."); - for (unsigned i = 0; i < StoreGEPList.size(); i++) { - llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, i, "vec.load"); + for (unsigned I = 0, Size = StoreGEPList.size(); I < Size; I++) { + llvm::Value *Load = CGF.Builder.CreateExtractElement(SrcVal, I, "vec.load"); llvm::Value *Cast = - CGF.EmitScalarConversion(Load, SrcTy, DestTypes[i], Loc); + CGF.EmitScalarConversion(Load, SrcTy, DestTypes[I], Loc); // store back - llvm::Value *Idx = StoreGEPList[i].second; + llvm::Value *Idx = StoreGEPList[I].second; if (Idx) { llvm::Value *V = - CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert"); + CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); } - CGF.Builder.CreateStore(Cast, StoreGEPList[i].first); + CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); } return; } @@ -974,7 +974,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { if (RV.isScalar()) { llvm::Value *SrcVal = RV.getScalarVal(); EmitHLSLScalarFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); - } else { // RHS is an aggregate + } else { assert(RV.isAggregate() && "Can't perform HLSL Aggregate cast on a complex type."); Address SrcVal = RV.getAggregateAddress(); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index e7f5a4f06f9bc..e7136ed11f3c1 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2272,24 +2272,25 @@ static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal, // LHS is either a vector or a builtin? // if its a vector create a temp alloca to store into and return that if (auto *VecTy = LHSTy->getAs()) { + assert(SrcTypes.size() >= VecTy->getNumElements() && + "Flattened type on RHS must have more elements than vector on LHS."); llvm::Value *V = CGF.Builder.CreateLoad(CGF.CreateIRTemp(LHSTy, "flatcast.tmp")); // write to V. - for (unsigned i = 0; i < VecTy->getNumElements(); i++) { - llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[i].first, "load"); - llvm::Value *Idx = LoadGEPList[i].second; + for (unsigned I = 0, E = VecTy->getNumElements(); I < E; I++) { + llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load"); + llvm::Value *Idx = LoadGEPList[I].second; Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; llvm::Value *Cast = CGF.EmitScalarConversion( - Load, SrcTypes[i], VecTy->getElementType(), Loc); - V = CGF.Builder.CreateInsertElement(V, Cast, i); + Load, SrcTypes[I], VecTy->getElementType(), Loc); + V = CGF.Builder.CreateInsertElement(V, Cast, I); } return V; } // i its a builtin just do an extract element or load. assert(LHSTy->isBuiltinType() && "Destination type must be a vector or builtin type."); - // TODO add asserts about things being long enough llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[0].first, "load"); llvm::Value *Idx = LoadGEPList[0].second; Load = From a7252d9be2ff961729cb9380a7b04cbd68c64088 Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Wed, 5 Feb 2025 09:53:24 -0800 Subject: [PATCH 15/18] self review of loops --- clang/lib/CodeGen/CGExpr.cpp | 6 +++--- clang/lib/CodeGen/CGExprAgg.cpp | 14 +++++++------- clang/lib/Sema/SemaHLSL.cpp | 6 ++++-- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 401e7b04a7dd7..5247db116de2d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6403,10 +6403,10 @@ void CodeGenFunction::FlattenAccessAndType( for (auto *FD : Record->fields()) FieldTypes.push_back(FD->getType()); - for (int64_t i = FieldTypes.size() - 1; i > -1; i--) { + for (int64_t I = FieldTypes.size() - 1; I > -1; I--) { llvm::SmallVector IdxListCopy = IdxList; - IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, i)); - WorkList.insert(WorkList.end(), {FieldTypes[i], IdxListCopy}); + IdxListCopy.push_back(llvm::ConstantInt::get(IdxTy, I)); + WorkList.insert(WorkList.end(), {FieldTypes[I], IdxListCopy}); } } else if (const auto *VT = dyn_cast(T)) { llvm::Type *LLVMT = ConvertTypeForMem(T); diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 8755d231a0972..dac2af4f023c7 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -545,22 +545,22 @@ static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal, has less elements than flattened destination object."); // apply casts to what we load from LoadGEPList // and store result in Dest - for (unsigned i = 0; i < StoreGEPList.size(); i++) { - llvm::Value *Idx = LoadGEPList[i].second; - llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[i].first, "load"); + for (unsigned I = 0, E = StoreGEPList.size(); I < E; I++) { + llvm::Value *Idx = LoadGEPList[I].second; + llvm::Value *Load = CGF.Builder.CreateLoad(LoadGEPList[I].first, "load"); Load = Idx ? CGF.Builder.CreateExtractElement(Load, Idx, "vec.extract") : Load; llvm::Value *Cast = - CGF.EmitScalarConversion(Load, SrcTypes[i], DestTypes[i], Loc); + CGF.EmitScalarConversion(Load, SrcTypes[I], DestTypes[I], Loc); // store back - Idx = StoreGEPList[i].second; + Idx = StoreGEPList[I].second; if (Idx) { llvm::Value *V = - CGF.Builder.CreateLoad(StoreGEPList[i].first, "load.for.insert"); + CGF.Builder.CreateLoad(StoreGEPList[I].first, "load.for.insert"); Cast = CGF.Builder.CreateInsertElement(V, Cast, Idx); } - CGF.Builder.CreateStore(Cast, StoreGEPList[i].first); + CGF.Builder.CreateStore(Cast, StoreGEPList[I].first); } } diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 56013b1ff1566..5bf58535bc022 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2500,8 +2500,10 @@ bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { if (SrcTypes.size() < DestTypes.size()) return false; + unsigned SrcSize = SrcTypes.size(); + unsigned DstSize = DestTypes.size(); unsigned I; - for (I = 0; I < DestTypes.size() && I < SrcTypes.size(); I++) { + for (I = 0; I < DstSize && I < SrcSize; I++) { if (SrcTypes[I]->isUnionType() || DestTypes[I]->isUnionType()) return false; if (!CanPerformScalarCast(SrcTypes[I], DestTypes[I])) { @@ -2510,7 +2512,7 @@ bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { } // check the rest of the source type for unions. - for (; I < SrcTypes.size(); I++) { + for (; I < SrcSize; I++) { if (SrcTypes[I]->isUnionType()) return false; } From 87d09f82e2c24915bee5718b7b12e9a61ebc3f6f Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Wed, 5 Feb 2025 16:44:23 -0800 Subject: [PATCH 16/18] replace HLSLAggregateCast with HLSLElementwiseCast --- clang/include/clang/AST/OperationKinds.def | 2 +- clang/lib/AST/Expr.cpp | 2 +- clang/lib/AST/ExprConstant.cpp | 4 ++-- clang/lib/CodeGen/CGExpr.cpp | 2 +- clang/lib/CodeGen/CGExprAgg.cpp | 12 ++++++------ clang/lib/CodeGen/CGExprComplex.cpp | 2 +- clang/lib/CodeGen/CGExprConstant.cpp | 2 +- clang/lib/CodeGen/CGExprScalar.cpp | 10 +++++----- clang/lib/Edit/RewriteObjCFoundationAPI.cpp | 2 +- clang/lib/Sema/SemaCast.cpp | 2 +- clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 2 +- clang/test/SemaHLSL/Language/FlatCasts.hlsl | 4 ++-- 12 files changed, 23 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/AST/OperationKinds.def b/clang/include/clang/AST/OperationKinds.def index 9323d4e861a73..b3dc7c3d8dc77 100644 --- a/clang/include/clang/AST/OperationKinds.def +++ b/clang/include/clang/AST/OperationKinds.def @@ -368,7 +368,7 @@ CAST_OPERATION(HLSLVectorTruncation) CAST_OPERATION(HLSLArrayRValue) // Aggregate by Value cast (HLSL only). -CAST_OPERATION(HLSLAggregateCast) +CAST_OPERATION(HLSLElementwiseCast) //===- Binary Operations -------------------------------------------------===// // Operators listed in order of precedence. diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 4764bc84ce498..3129727703ef6 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1942,7 +1942,7 @@ bool CastExpr::CastConsistency() const { case CK_FixedPointToBoolean: case CK_HLSLArrayRValue: case CK_HLSLVectorTruncation: - case CK_HLSLAggregateCast: + case CK_HLSLElementwiseCast: CheckNoBasePath: assert(path_empty() && "Cast kind should not have a base path!"); break; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a3f8e26b9a782..068d7333212f6 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14875,7 +14875,7 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_NoOp: case CK_LValueToRValueBitCast: case CK_HLSLArrayRValue: - case CK_HLSLAggregateCast: + case CK_HLSLElementwiseCast: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_MemberPointerToBoolean: @@ -15734,7 +15734,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_IntegralToFixedPoint: case CK_MatrixCast: case CK_HLSLVectorTruncation: - case CK_HLSLAggregateCast: + case CK_HLSLElementwiseCast: llvm_unreachable("invalid cast kind for complex value"); case CK_LValueToRValue: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 5247db116de2d..24cb3b9ccb55c 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5320,7 +5320,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_MatrixCast: case CK_HLSLVectorTruncation: case CK_HLSLArrayRValue: - case CK_HLSLAggregateCast: + case CK_HLSLElementwiseCast: return EmitUnsupportedLValue(E, "unexpected cast lvalue"); case CK_Dependent: diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index dac2af4f023c7..c3f1cbed6b39f 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -526,9 +526,9 @@ static void EmitHLSLScalarFlatCast(CodeGenFunction &CGF, Address DestVal, } // emit a flat cast where the RHS is an aggregate -static void EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address DestVal, - QualType DestTy, Address SrcVal, - QualType SrcTy, SourceLocation Loc) { +static void EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address DestVal, + QualType DestTy, Address SrcVal, + QualType SrcTy, SourceLocation Loc) { // Flatten our destination SmallVector DestTypes; // Flattened type SmallVector, 16> StoreGEPList; @@ -963,7 +963,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { case CK_HLSLArrayRValue: Visit(E->getSubExpr()); break; - case CK_HLSLAggregateCast: { + case CK_HLSLElementwiseCast: { Expr *Src = E->getSubExpr(); QualType SrcTy = Src->getType(); RValue RV = CGF.EmitAnyExpr(Src); @@ -978,7 +978,7 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { assert(RV.isAggregate() && "Can't perform HLSL Aggregate cast on a complex type."); Address SrcVal = RV.getAggregateAddress(); - EmitHLSLAggregateFlatCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); + EmitHLSLElementwiseCast(CGF, DestVal, DestTy, SrcVal, SrcTy, Loc); } break; } @@ -1552,7 +1552,7 @@ static bool castPreservesZero(const CastExpr *CE) { case CK_NonAtomicToAtomic: case CK_AtomicToNonAtomic: case CK_HLSLVectorTruncation: - case CK_HLSLAggregateCast: + case CK_HLSLElementwiseCast: return true; case CK_BaseToDerivedMemberPointer: diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index 05680d36aa2bd..c2679ea92dc97 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -610,7 +610,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_MatrixCast: case CK_HLSLVectorTruncation: case CK_HLSLArrayRValue: - case CK_HLSLAggregateCast: + case CK_HLSLElementwiseCast: llvm_unreachable("invalid cast kind for complex value"); case CK_FloatingRealToComplex: diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 6d15bc9058e45..ef11798869d3b 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1335,7 +1335,7 @@ class ConstExprEmitter case CK_MatrixCast: case CK_HLSLVectorTruncation: case CK_HLSLArrayRValue: - case CK_HLSLAggregateCast: + case CK_HLSLElementwiseCast: return nullptr; } llvm_unreachable("Invalid CastKind"); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index e7136ed11f3c1..d099f3bdfa18a 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2263,9 +2263,9 @@ bool CodeGenFunction::ShouldNullCheckClassCastValue(const CastExpr *CE) { } // RHS is an aggregate type -static Value *EmitHLSLAggregateFlatCast(CodeGenFunction &CGF, Address RHSVal, - QualType RHSTy, QualType LHSTy, - SourceLocation Loc) { +static Value *EmitHLSLElementwiseCast(CodeGenFunction &CGF, Address RHSVal, + QualType RHSTy, QualType LHSTy, + SourceLocation Loc) { SmallVector, 16> LoadGEPList; SmallVector SrcTypes; // Flattened type CGF.FlattenAccessAndType(RHSVal, RHSTy, LoadGEPList, SrcTypes); @@ -2788,7 +2788,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { llvm::Value *Zero = llvm::Constant::getNullValue(CGF.SizeTy); return Builder.CreateExtractElement(Vec, Zero, "cast.vtrunc"); } - case CK_HLSLAggregateCast: { + case CK_HLSLElementwiseCast: { RValue RV = CGF.EmitAnyExpr(E); SourceLocation Loc = CE->getExprLoc(); QualType SrcTy = E->getType(); @@ -2796,7 +2796,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { assert(RV.isAggregate() && "Not a valid HLSL Flat Cast."); // RHS is an aggregate Address SrcVal = RV.getAggregateAddress(); - return EmitHLSLAggregateFlatCast(CGF, SrcVal, SrcTy, DestTy, Loc); + return EmitHLSLElementwiseCast(CGF, SrcVal, SrcTy, DestTy, Loc); } } // end of switch diff --git a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp index 63308319a78d1..32f5ebb55155e 100644 --- a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp +++ b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp @@ -1085,7 +1085,7 @@ static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg, llvm_unreachable("OpenCL-specific cast in Objective-C?"); case CK_HLSLVectorTruncation: - case CK_HLSLAggregateCast: + case CK_HLSLElementwiseCast: llvm_unreachable("HLSL-specific cast in Objective-C?"); break; diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 35a9afc0d1607..2befb6d6f748a 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2781,7 +2781,7 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle, SrcExpr = Self.ImpCastExprToType( SrcExpr.get(), Self.Context.getArrayParameterType(SrcTy), CK_HLSLArrayRValue, VK_PRValue, nullptr, CCK); - Kind = CK_HLSLAggregateCast; + Kind = CK_HLSLElementwiseCast; return; } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index b105c196fc3bf..3a983421358c7 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -522,7 +522,7 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, case CK_ToUnion: case CK_MatrixCast: case CK_VectorSplat: - case CK_HLSLAggregateCast: + case CK_HLSLElementwiseCast: case CK_HLSLVectorTruncation: { QualType resultType = CastE->getType(); if (CastE->isGLValue()) diff --git a/clang/test/SemaHLSL/Language/FlatCasts.hlsl b/clang/test/SemaHLSL/Language/FlatCasts.hlsl index c869b32f0276c..563d3f02a1485 100644 --- a/clang/test/SemaHLSL/Language/FlatCasts.hlsl +++ b/clang/test/SemaHLSL/Language/FlatCasts.hlsl @@ -2,7 +2,7 @@ // truncation // CHECK-LABEL: call1 -// CHECK: CStyleCastExpr {{.*}} 'int[1]' +// CHECK: CStyleCastExpr {{.*}} 'int[1]' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int[2]' part_of_explicit_cast // CHECK-NEXT: DeclRefExpr {{.*}} 'int[2]' lvalue Var {{.*}} 'A' 'int[2]' export void call1() { @@ -13,7 +13,7 @@ export void call1() { // flat cast of equal size // CHECK-LABEL: call2 -// CHECK: CStyleCastExpr {{.*}} 'float[1]' +// CHECK: CStyleCastExpr {{.*}} 'float[1]' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'int[1]' part_of_explicit_cast // CHECK-NEXT: DeclRefExpr {{.*}} 'int[1]' lvalue Var {{.*}} 'A' 'int[1]' export void call2() { From 03ed41750c78c9086d078121085ad03aef6f9ede Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Wed, 5 Feb 2025 16:54:20 -0800 Subject: [PATCH 17/18] rename test files --- .../{ArrayFlatCast.hlsl => ArrayElementwiseCast.hlsl} | 0 .../{StructFlatCast.hlsl => StructElementwiseCast.hlsl} | 0 .../{VectorFlatCast.hlsl => VectorElementwiseCast.hlsl} | 0 .../{FlatCast-errors.hlsl => ElementwiseCast-errors.hlsl} | 0 .../SemaHLSL/Language/{FlatCasts.hlsl => ElementwiseCasts.hlsl} | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename clang/test/CodeGenHLSL/BasicFeatures/{ArrayFlatCast.hlsl => ArrayElementwiseCast.hlsl} (100%) rename clang/test/CodeGenHLSL/BasicFeatures/{StructFlatCast.hlsl => StructElementwiseCast.hlsl} (100%) rename clang/test/CodeGenHLSL/BasicFeatures/{VectorFlatCast.hlsl => VectorElementwiseCast.hlsl} (100%) rename clang/test/SemaHLSL/Language/{FlatCast-errors.hlsl => ElementwiseCast-errors.hlsl} (100%) rename clang/test/SemaHLSL/Language/{FlatCasts.hlsl => ElementwiseCasts.hlsl} (100%) diff --git a/clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl similarity index 100% rename from clang/test/CodeGenHLSL/BasicFeatures/ArrayFlatCast.hlsl rename to clang/test/CodeGenHLSL/BasicFeatures/ArrayElementwiseCast.hlsl diff --git a/clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl similarity index 100% rename from clang/test/CodeGenHLSL/BasicFeatures/StructFlatCast.hlsl rename to clang/test/CodeGenHLSL/BasicFeatures/StructElementwiseCast.hlsl diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl similarity index 100% rename from clang/test/CodeGenHLSL/BasicFeatures/VectorFlatCast.hlsl rename to clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl diff --git a/clang/test/SemaHLSL/Language/FlatCast-errors.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl similarity index 100% rename from clang/test/SemaHLSL/Language/FlatCast-errors.hlsl rename to clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl diff --git a/clang/test/SemaHLSL/Language/FlatCasts.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl similarity index 100% rename from clang/test/SemaHLSL/Language/FlatCasts.hlsl rename to clang/test/SemaHLSL/Language/ElementwiseCasts.hlsl From d23411adf0fede190cc68015e0547c766e18069c Mon Sep 17 00:00:00 2001 From: Sarah Spall Date: Wed, 5 Feb 2025 18:58:29 -0800 Subject: [PATCH 18/18] error for bitfields. tests for bitfield errors. minor pr suggestion change --- clang/include/clang/Sema/SemaHLSL.h | 3 +- clang/lib/CodeGen/CGExpr.cpp | 2 +- clang/lib/Sema/SemaCast.cpp | 2 +- clang/lib/Sema/SemaHLSL.cpp | 41 ++++++++++++++++++- .../Language/ElementwiseCast-errors.hlsl | 21 ++++++++++ 5 files changed, 64 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 6bda1e8ce0ea5..a2ad7c133c794 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -141,7 +141,8 @@ class SemaHLSL : public SemaBase { bool diagnoseInputIDType(QualType T, const ParsedAttr &AL); bool CanPerformScalarCast(QualType SrcTy, QualType DestTy); - bool CanPerformAggregateCast(Expr *Src, QualType DestType); + bool ContainsBitField(QualType BaseTy); + bool CanPerformElementwiseCast(Expr *Src, QualType DestType); ExprResult ActOnOutParamExpr(ParmVarDecl *Param, Expr *Arg); QualType getInoutParameterType(QualType Ty); diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 24cb3b9ccb55c..df25abfd84ac0 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6417,7 +6417,7 @@ void CodeGenFunction::FlattenAccessAndType( llvm::Value *Idx = llvm::ConstantInt::get(IdxTy, I); // gep on vector fields is not recommended so combine gep with // extract/insert - AccessList.push_back({GEP, Idx}); + AccessList.emplace_back(GEP, Idx); FlatTypes.push_back(VT->getElementType()); } } else { diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp index 2befb6d6f748a..9a7a94a8fe432 100644 --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2776,7 +2776,7 @@ void CastOperation::CheckCXXCStyleCast(bool FunctionalStyle, // vector cast, vector truncation, or special hlsl splat cases QualType SrcTy = SrcExpr.get()->getType(); if (Self.getLangOpts().HLSL && - Self.HLSL().CanPerformAggregateCast(SrcExpr.get(), DestType)) { + Self.HLSL().CanPerformElementwiseCast(SrcExpr.get(), DestType)) { if (SrcTy->isConstantArrayType()) SrcExpr = Self.ImpCastExprToType( SrcExpr.get(), Self.Context.getArrayParameterType(SrcTy), diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 5bf58535bc022..33416072c59b4 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2476,9 +2476,43 @@ bool SemaHLSL::CanPerformScalarCast(QualType SrcTy, QualType DestTy) { llvm_unreachable("Unhandled scalar cast"); } -// Can we perform an HLSL Flattened cast? +// Detect if a type contains a bitfield. Will be removed when +// bitfield support is added to HLSLElementwiseCast +bool SemaHLSL::ContainsBitField(QualType BaseTy) { + llvm::SmallVector WorkList; + WorkList.push_back(BaseTy); + while (!WorkList.empty()) { + QualType T = WorkList.pop_back_val(); + T = T.getCanonicalType().getUnqualifiedType(); + // only check aggregate types + if (const auto *AT = dyn_cast(T)) { + WorkList.push_back(AT->getElementType()); + continue; + } + if (const auto *RT = dyn_cast(T)) { + const RecordDecl *RD = RT->getDecl(); + if (RD->isUnion()) + continue; + + const CXXRecordDecl *CXXD = dyn_cast(RD); + + if (CXXD && CXXD->isStandardLayout()) + RD = CXXD->getStandardLayoutBaseWithFields(); + + for (const auto *FD : RD->fields()) { + if (FD->isBitField()) + return true; + WorkList.push_back(FD->getType()); + } + continue; + } + } + return false; +} + +// Can we perform an HLSL Elementwise cast? // TODO: update this code when matrices are added; see issue #88060 -bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { +bool SemaHLSL::CanPerformElementwiseCast(Expr *Src, QualType DestTy) { // Don't handle casts where LHS and RHS are any combination of scalar/vector // There must be an aggregate somewhere @@ -2490,6 +2524,9 @@ bool SemaHLSL::CanPerformAggregateCast(Expr *Src, QualType DestTy) { (DestTy->isScalarType() || DestTy->isVectorType())) return false; + if (ContainsBitField(DestTy) || ContainsBitField(SrcTy)) + return false; + llvm::SmallVector DestTypes; BuildFlattenedTypeList(DestTy, DestTypes); llvm::SmallVector SrcTypes; diff --git a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl index b5f482940dbdd..c900c83a063a0 100644 --- a/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl +++ b/clang/test/SemaHLSL/Language/ElementwiseCast-errors.hlsl @@ -6,3 +6,24 @@ export void cantCast() { B = (int[4])A; // expected-error@-1 {{C-style cast from 'int *' to 'int[4]' is not allowed}} } + +struct S { +// expected-note@-1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int2' (aka 'vector') to 'const S' for 1st argument}} +// expected-note@-2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int2' (aka 'vector') to 'S' for 1st argument}} +// expected-note@-3 {{candidate constructor (the implicit default constructor) not viable: requires 0 arguments, but 1 was provided}} + int A : 8; + int B; +}; + +// casting types which contain bitfields is not yet supported. +export void cantCast2() { + S s = {1,2}; + int2 C = (int2)s; + // expected-error@-1 {{cannot convert 'S' to 'int2' (aka 'vector') without a conversion operator}} +} + +export void cantCast3() { + int2 C = {1,2}; + S s = (S)C; + // expected-error@-1 {{no matching conversion for C-style cast from 'int2' (aka 'vector') to 'S'}} +}