diff --git a/include/dxc/Support/DxcOptToggles.h b/include/dxc/Support/DxcOptToggles.h index cf57b6fa77..04c4c68496 100644 --- a/include/dxc/Support/DxcOptToggles.h +++ b/include/dxc/Support/DxcOptToggles.h @@ -38,6 +38,8 @@ enum { static const Toggle TOGGLE_GVN = {"gvn", DEFAULT_ON}; static const Toggle TOGGLE_LICM = {"licm", DEFAULT_ON}; static const Toggle TOGGLE_SINK = {"sink", DEFAULT_ON}; +static const Toggle TOGGLE_ENABLE_AGGRESSIVE_REASSOCIATION = { + "aggressive-reassociation", DEFAULT_ON}; static const Toggle TOGGLE_LIFETIME_MARKERS = {"lifetime-markers", DEFAULT_ON}; static const Toggle TOGGLE_PARTIAL_LIFETIME_MARKERS = { "partial-lifetime-markers", DEFAULT_OFF}; diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index 9726c1aa16..2f342d9412 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -134,6 +134,7 @@ class PassManagerBuilder { unsigned ScanLimit = 0; // HLSL Change bool EnableGVN = true; // HLSL Change bool StructurizeLoopExitsForUnroll = false; // HLSL Change + bool HLSLEnableAggressiveReassociation = true; // HLSL Change bool HLSLEnableLifetimeMarkers = false; // HLSL Change bool HLSLEnablePartialLifetimeMarkers = false; // HLSL Change bool HLSLEnableDebugNops = false; // HLSL Change diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index a7c3eae65a..37bb662fb9 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -331,6 +331,8 @@ extern char &DemoteRegisterToMemoryHlslID; // For example: 4 + (x + 5) -> x + (4 + 5) // FunctionPass *createReassociatePass(); +FunctionPass * +createReassociatePass(bool HLSLEnableAggressiveReassociation); // HLSL Change //===----------------------------------------------------------------------===// // diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 4b31542df6..0495340dc6 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -469,7 +469,8 @@ void PassManagerBuilder::populateModulePassManager( //MPM.add(createTailCallEliminationPass()); // Eliminate tail calls // HLSL Change Ends. MPM.add(createCFGSimplificationPass()); // Merge & remove BBs - MPM.add(createReassociatePass()); // Reassociate expressions + MPM.add(createReassociatePass( + HLSLEnableAggressiveReassociation)); // Reassociate expressions // Rotate Loop - disable header duplication at -Oz MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1)); // HLSL Change - disable LICM in frontend for not consider register pressure. diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index b5b0f7fa04..9f2b32f852 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -176,6 +176,22 @@ namespace { initializeReassociatePass(*PassRegistry::getPassRegistry()); } + // HLSL Change - begin + // Enable global reassociation when HLSLEnableAggressiveReassociation is + // set + bool HLSLEnableAggressiveReassociation = true; + Reassociate(bool HLSLEnableAggressiveReassociation) : Reassociate() { + this->HLSLEnableAggressiveReassociation = + HLSLEnableAggressiveReassociation; + } + + void applyOptions(PassOptions O) override { + GetPassOptionBool(O, "EnableAggressiveReassociation", + &HLSLEnableAggressiveReassociation, + /*defaultValue*/ true); + } + // HLSL Change - end + bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -242,6 +258,13 @@ INITIALIZE_PASS(Reassociate, "reassociate", // Public interface to the Reassociate pass FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } +// HLSL Change - begin +FunctionPass * +llvm::createReassociatePass(bool HLSLEnableAggressiveReassociation) { + return new Reassociate(HLSLEnableAggressiveReassociation); +} +// HLSL Change - end + /// Return true if V is an instruction of the specified opcode and if it /// only has one use. static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) { @@ -2243,7 +2266,8 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) { return; } - if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) { + if (HLSLEnableAggressiveReassociation && // HLSL Change + (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit)) { // Find the pair with the highest count in the pairmap and move it to the // back of the list so that it can later be CSE'd. // example: @@ -2347,22 +2371,24 @@ bool Reassociate::runOnFunction(Function &F) { // Calculate the rank map for F BuildRankMap(F); - // Build the pair map before running reassociate. - // Technically this would be more accurate if we did it after one round - // of reassociation, but in practice it doesn't seem to help much on - // real-world code, so don't waste the compile time running reassociate - // twice. - // If a user wants, they could expicitly run reassociate twice in their - // pass pipeline for further potential gains. - // It might also be possible to update the pair map during runtime, but the - // overhead of that may be large if there's many reassociable chains. - // TODO: RPOT - // Get the functions basic blocks in Reverse Post Order. This order is used by - // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic - // blocks (it has been seen that the analysis in this pass could hang when - // analysing dead basic blocks). - ReversePostOrderTraversal RPOT(&F); - BuildPairMap(RPOT); + if (HLSLEnableAggressiveReassociation) { // HLSL Change + // Build the pair map before running reassociate. + // Technically this would be more accurate if we did it after one round + // of reassociation, but in practice it doesn't seem to help much on + // real-world code, so don't waste the compile time running reassociate + // twice. + // If a user wants, they could expicitly run reassociate twice in their + // pass pipeline for further potential gains. + // It might also be possible to update the pair map during runtime, but the + // overhead of that may be large if there's many reassociable chains. + // TODO: RPOT + // Get the functions basic blocks in Reverse Post Order. This order is used + // by BuildRankMap to pre calculate ranks correctly. It also excludes dead + // basic blocks (it has been seen that the analysis in this pass could hang + // when analysing dead basic blocks). + ReversePostOrderTraversal RPOT(&F); + BuildPairMap(RPOT); + } // HLSL Change MadeChange = false; for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { @@ -2389,8 +2415,10 @@ bool Reassociate::runOnFunction(Function &F) { // We are done with the rank map and pair map. RankMap.clear(); ValueRankMap.clear(); - for (auto &Entry : PairMap) - Entry.clear(); + if (HLSLEnableAggressiveReassociation) { // HLSL Change + for (auto &Entry : PairMap) + Entry.clear(); + } // HLSL Change return MadeChange; } diff --git a/tools/clang/lib/CodeGen/BackendUtil.cpp b/tools/clang/lib/CodeGen/BackendUtil.cpp index 294ca05946..1013d3149c 100644 --- a/tools/clang/lib/CodeGen/BackendUtil.cpp +++ b/tools/clang/lib/CodeGen/BackendUtil.cpp @@ -357,6 +357,8 @@ void EmitAssemblyHelper::CreatePasses() { OptToggles.IsEnabled(hlsl::options::TOGGLE_LIFETIME_MARKERS); PMBuilder.HLSLEnablePartialLifetimeMarkers = OptToggles.IsEnabled(hlsl::options::TOGGLE_PARTIAL_LIFETIME_MARKERS); + PMBuilder.HLSLEnableAggressiveReassociation = OptToggles.IsEnabled( + hlsl::options::TOGGLE_ENABLE_AGGRESSIVE_REASSOCIATION); // HLSL Change - end PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime; diff --git a/tools/clang/test/DXC/Passes/reassociate/reassociation-flag.hlsl b/tools/clang/test/DXC/Passes/reassociate/reassociation-flag.hlsl new file mode 100644 index 0000000000..b84498b5a0 --- /dev/null +++ b/tools/clang/test/DXC/Passes/reassociate/reassociation-flag.hlsl @@ -0,0 +1,31 @@ +// RUN: %dxc -T cs_6_3 -E cs_main %s -opt-enable aggressive-reassociation | FileCheck %s -check-prefixes=CHECK,COMMON_FACTOR +// RUN: %dxc -T cs_6_3 -E cs_main %s -opt-disable aggressive-reassociation | FileCheck %s -check-prefixes=CHECK,NO_COMMON_FACTOR + +// Make sure DXC recognize the common factor and generate optimized dxils if the enable-aggressive-reassociation is true. + +// CHECK: [[FACTOR_SRC1:%.*]] = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96) +// CHECK: [[FACTOR_SRC0:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0) + +// COMMON_FACTOR: [[FACTOR:%.*]] = mul i32 [[FACTOR_SRC0]], [[FACTOR_SRC1]] +// COMMON_FACTOR: mul i32 [[FACTOR]], +// COMMON_FACTOR: mul i32 [[FACTOR]], + +// NO_COMMON_FACTOR: [[EXPRESSION_0:%.*]] = mul i32 [[FACTOR_SRC1]], +// NO_COMMON_FACTOR: mul i32 [[EXPRESSION_0]], [[FACTOR_SRC0]] +// NO_COMMON_FACTOR: [[EXPRESSION_1:%.*]] = mul i32 [[FACTOR_SRC0]], [[FACTOR_SRC1]] +// NO_COMMON_FACTOR: mul i32 [[EXPRESSION_1]], + + +RWTexture1D < float2 > outColorBuffer : register ( u0 ) ; + +[ numthreads ( 8 , 8 , 1 ) ] +void cs_main ( uint3 GroupID : SV_GroupID , uint GroupIndex : SV_GroupIndex , uint3 GTID : SV_GroupThreadID , uint3 DispatchThreadID : SV_DispatchThreadID ) +{ + // DXC should recognize (GroupIndex * GTID.x) is a common factor + uint a = GroupIndex * GroupID.x; + uint b = GroupIndex * DispatchThreadID.x; + uint c = a * GTID.x; + uint d = b * GTID.x; + + outColorBuffer [ DispatchThreadID.y ] = float2(c, d); +} \ No newline at end of file diff --git a/tools/clang/test/DXC/Passes/reassociate/reassociation-flag.ll b/tools/clang/test/DXC/Passes/reassociate/reassociation-flag.ll new file mode 100644 index 0000000000..d8276014e4 --- /dev/null +++ b/tools/clang/test/DXC/Passes/reassociate/reassociation-flag.ll @@ -0,0 +1,24 @@ +; RUN: %dxopt %s -reassociate,EnableAggressiveReassociation=1 -gvn -S | FileCheck %s -check-prefixes=CHECK,COMMON_FACTOR +; RUN: %dxopt %s -reassociate,EnableAggressiveReassociation=0 -gvn -S | FileCheck %s -check-prefixes=CHECK,NO_COMMON_FACTOR + +; CHECK: @test1 + +; COMMON_FACTOR: %[[FACTOR:.*]] = mul i32 %X4, %X3 +; COMMON_FACTOR-NEXT: %[[C:.*]] = mul i32 %[[FACTOR]], %X1 +; COMMON_FACTOR-NEXT: %[[D:.*]] = mul i32 %[[FACTOR]], %X2 + +; NO_COMMON_FACTOR: %[[A:.*]] = mul i32 %X3, %X1 +; NO_COMMON_FACTOR: %[[B:.*]] = mul i32 %X3, %X2 +; NO_COMMON_FACTOR: %[[C:.*]] = mul i32 %[[A]], %X4 +; NO_COMMON_FACTOR: %[[D:.*]] = mul i32 %[[B]], %X4 + +; CHECK: %[[E:.*]] = xor i32 %[[C]], %[[D]] +; CHECK: ret i32 %[[E]] +define i32 @test1(i32 %X1, i32 %X2, i32 %X3, i32 %X4) { + %A = mul i32 %X3, %X1 + %B = mul i32 %X3, %X2 + %C = mul i32 %A, %X4 + %D = mul i32 %B, %X4 + %E = xor i32 %C, %D + ret i32 %E +} \ No newline at end of file diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 3e31837901..8d285d6e8b 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -6525,7 +6525,14 @@ def add_pass(name, type_name, doc, opts): [], ) # createTailCallEliminationPass is removed - but is this checked before? - add_pass("reassociate", "Reassociate", "Reassociate expressions", []) + add_pass( + "reassociate", + "Reassociate", + "Reassociate expressions", + [ + {"n": "EnableAggressiveReassociation", "t": "bool", "c": 1}, + ], + ) add_pass( "loop-rotate", "LoopRotate",