Skip to content

Commit

Permalink
Add a flag for the upstream global reassociation algorithm change (#6625
Browse files Browse the repository at this point in the history
)

This PR (#6598)
pulls the upstream global reassociation algorithm change in DXC and can
reduce redundant calculations obviously.

However, from the testing result of a large offline suite of shaders,
some shaders got worse compilation results and couldn't benefit from
this upstream change.

This PR adds a flag for the upstream global reassociation change. It
would be easier to roll back if a shader get worse compilation result
due to this upstream change.

This is part 2 of the fix for #6593.
  • Loading branch information
lizhengxing authored May 21, 2024
1 parent 6a34e29 commit 1ee70fd
Show file tree
Hide file tree
Showing 9 changed files with 119 additions and 21 deletions.
2 changes: 2 additions & 0 deletions include/dxc/Support/DxcOptToggles.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ enum {
static const Toggle TOGGLE_GVN = {"gvn", DEFAULT_ON};
static const Toggle TOGGLE_LICM = {"licm", DEFAULT_ON};
static const Toggle TOGGLE_SINK = {"sink", DEFAULT_ON};
static const Toggle TOGGLE_ENABLE_AGGRESSIVE_REASSOCIATION = {
"aggressive-reassociation", DEFAULT_ON};
static const Toggle TOGGLE_LIFETIME_MARKERS = {"lifetime-markers", DEFAULT_ON};
static const Toggle TOGGLE_PARTIAL_LIFETIME_MARKERS = {
"partial-lifetime-markers", DEFAULT_OFF};
Expand Down
1 change: 1 addition & 0 deletions include/llvm/Transforms/IPO/PassManagerBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ class PassManagerBuilder {
unsigned ScanLimit = 0; // HLSL Change
bool EnableGVN = true; // HLSL Change
bool StructurizeLoopExitsForUnroll = false; // HLSL Change
bool HLSLEnableAggressiveReassociation = true; // HLSL Change
bool HLSLEnableLifetimeMarkers = false; // HLSL Change
bool HLSLEnablePartialLifetimeMarkers = false; // HLSL Change
bool HLSLEnableDebugNops = false; // HLSL Change
Expand Down
2 changes: 2 additions & 0 deletions include/llvm/Transforms/Scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,8 @@ extern char &DemoteRegisterToMemoryHlslID;
// For example: 4 + (x + 5) -> x + (4 + 5)
//
FunctionPass *createReassociatePass();
FunctionPass *
createReassociatePass(bool HLSLEnableAggressiveReassociation); // HLSL Change

//===----------------------------------------------------------------------===//
//
Expand Down
3 changes: 2 additions & 1 deletion lib/Transforms/IPO/PassManagerBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,8 @@ void PassManagerBuilder::populateModulePassManager(
//MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
// HLSL Change Ends.
MPM.add(createCFGSimplificationPass()); // Merge & remove BBs
MPM.add(createReassociatePass()); // Reassociate expressions
MPM.add(createReassociatePass(
HLSLEnableAggressiveReassociation)); // Reassociate expressions
// Rotate Loop - disable header duplication at -Oz
MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));
// HLSL Change - disable LICM in frontend for not consider register pressure.
Expand Down
66 changes: 47 additions & 19 deletions lib/Transforms/Scalar/Reassociate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,22 @@ namespace {
initializeReassociatePass(*PassRegistry::getPassRegistry());
}

// HLSL Change - begin
// Enable global reassociation when HLSLEnableAggressiveReassociation is
// set
bool HLSLEnableAggressiveReassociation = true;
Reassociate(bool HLSLEnableAggressiveReassociation) : Reassociate() {
this->HLSLEnableAggressiveReassociation =
HLSLEnableAggressiveReassociation;
}

void applyOptions(PassOptions O) override {
GetPassOptionBool(O, "EnableAggressiveReassociation",
&HLSLEnableAggressiveReassociation,
/*defaultValue*/ true);
}
// HLSL Change - end

bool runOnFunction(Function &F) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
Expand Down Expand Up @@ -242,6 +258,13 @@ INITIALIZE_PASS(Reassociate, "reassociate",
// Public interface to the Reassociate pass
FunctionPass *llvm::createReassociatePass() { return new Reassociate(); }

// HLSL Change - begin
FunctionPass *
llvm::createReassociatePass(bool HLSLEnableAggressiveReassociation) {
return new Reassociate(HLSLEnableAggressiveReassociation);
}
// HLSL Change - end

/// Return true if V is an instruction of the specified opcode and if it
/// only has one use.
static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
Expand Down Expand Up @@ -2243,7 +2266,8 @@ void Reassociate::ReassociateExpression(BinaryOperator *I) {
return;
}

if (Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit) {
if (HLSLEnableAggressiveReassociation && // HLSL Change
(Ops.size() > 2 && Ops.size() <= GlobalReassociateLimit)) {
// Find the pair with the highest count in the pairmap and move it to the
// back of the list so that it can later be CSE'd.
// example:
Expand Down Expand Up @@ -2347,22 +2371,24 @@ bool Reassociate::runOnFunction(Function &F) {
// Calculate the rank map for F
BuildRankMap(F);

// Build the pair map before running reassociate.
// Technically this would be more accurate if we did it after one round
// of reassociation, but in practice it doesn't seem to help much on
// real-world code, so don't waste the compile time running reassociate
// twice.
// If a user wants, they could expicitly run reassociate twice in their
// pass pipeline for further potential gains.
// It might also be possible to update the pair map during runtime, but the
// overhead of that may be large if there's many reassociable chains.
// TODO: RPOT
// Get the functions basic blocks in Reverse Post Order. This order is used by
// BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
// blocks (it has been seen that the analysis in this pass could hang when
// analysing dead basic blocks).
ReversePostOrderTraversal<Function *> RPOT(&F);
BuildPairMap(RPOT);
if (HLSLEnableAggressiveReassociation) { // HLSL Change
// Build the pair map before running reassociate.
// Technically this would be more accurate if we did it after one round
// of reassociation, but in practice it doesn't seem to help much on
// real-world code, so don't waste the compile time running reassociate
// twice.
// If a user wants, they could expicitly run reassociate twice in their
// pass pipeline for further potential gains.
// It might also be possible to update the pair map during runtime, but the
// overhead of that may be large if there's many reassociable chains.
// TODO: RPOT
// Get the functions basic blocks in Reverse Post Order. This order is used
// by BuildRankMap to pre calculate ranks correctly. It also excludes dead
// basic blocks (it has been seen that the analysis in this pass could hang
// when analysing dead basic blocks).
ReversePostOrderTraversal<Function *> RPOT(&F);
BuildPairMap(RPOT);
} // HLSL Change

MadeChange = false;
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
Expand All @@ -2389,8 +2415,10 @@ bool Reassociate::runOnFunction(Function &F) {
// We are done with the rank map and pair map.
RankMap.clear();
ValueRankMap.clear();
for (auto &Entry : PairMap)
Entry.clear();
if (HLSLEnableAggressiveReassociation) { // HLSL Change
for (auto &Entry : PairMap)
Entry.clear();
} // HLSL Change

return MadeChange;
}
2 changes: 2 additions & 0 deletions tools/clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,8 @@ void EmitAssemblyHelper::CreatePasses() {
OptToggles.IsEnabled(hlsl::options::TOGGLE_LIFETIME_MARKERS);
PMBuilder.HLSLEnablePartialLifetimeMarkers =
OptToggles.IsEnabled(hlsl::options::TOGGLE_PARTIAL_LIFETIME_MARKERS);
PMBuilder.HLSLEnableAggressiveReassociation = OptToggles.IsEnabled(
hlsl::options::TOGGLE_ENABLE_AGGRESSIVE_REASSOCIATION);
// HLSL Change - end

PMBuilder.DisableUnitAtATime = !CodeGenOpts.UnitAtATime;
Expand Down
31 changes: 31 additions & 0 deletions tools/clang/test/DXC/Passes/reassociate/reassociation-flag.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// RUN: %dxc -T cs_6_3 -E cs_main %s -opt-enable aggressive-reassociation | FileCheck %s -check-prefixes=CHECK,COMMON_FACTOR
// RUN: %dxc -T cs_6_3 -E cs_main %s -opt-disable aggressive-reassociation | FileCheck %s -check-prefixes=CHECK,NO_COMMON_FACTOR

// Make sure DXC recognize the common factor and generate optimized dxils if the enable-aggressive-reassociation is true.

// CHECK: [[FACTOR_SRC1:%.*]] = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
// CHECK: [[FACTOR_SRC0:%.*]] = call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 0)

// COMMON_FACTOR: [[FACTOR:%.*]] = mul i32 [[FACTOR_SRC0]], [[FACTOR_SRC1]]
// COMMON_FACTOR: mul i32 [[FACTOR]],
// COMMON_FACTOR: mul i32 [[FACTOR]],

// NO_COMMON_FACTOR: [[EXPRESSION_0:%.*]] = mul i32 [[FACTOR_SRC1]],
// NO_COMMON_FACTOR: mul i32 [[EXPRESSION_0]], [[FACTOR_SRC0]]
// NO_COMMON_FACTOR: [[EXPRESSION_1:%.*]] = mul i32 [[FACTOR_SRC0]], [[FACTOR_SRC1]]
// NO_COMMON_FACTOR: mul i32 [[EXPRESSION_1]],


RWTexture1D < float2 > outColorBuffer : register ( u0 ) ;

[ numthreads ( 8 , 8 , 1 ) ]
void cs_main ( uint3 GroupID : SV_GroupID , uint GroupIndex : SV_GroupIndex , uint3 GTID : SV_GroupThreadID , uint3 DispatchThreadID : SV_DispatchThreadID )
{
// DXC should recognize (GroupIndex * GTID.x) is a common factor
uint a = GroupIndex * GroupID.x;
uint b = GroupIndex * DispatchThreadID.x;
uint c = a * GTID.x;
uint d = b * GTID.x;

outColorBuffer [ DispatchThreadID.y ] = float2(c, d);
}
24 changes: 24 additions & 0 deletions tools/clang/test/DXC/Passes/reassociate/reassociation-flag.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
; RUN: %dxopt %s -reassociate,EnableAggressiveReassociation=1 -gvn -S | FileCheck %s -check-prefixes=CHECK,COMMON_FACTOR
; RUN: %dxopt %s -reassociate,EnableAggressiveReassociation=0 -gvn -S | FileCheck %s -check-prefixes=CHECK,NO_COMMON_FACTOR

; CHECK: @test1

; COMMON_FACTOR: %[[FACTOR:.*]] = mul i32 %X4, %X3
; COMMON_FACTOR-NEXT: %[[C:.*]] = mul i32 %[[FACTOR]], %X1
; COMMON_FACTOR-NEXT: %[[D:.*]] = mul i32 %[[FACTOR]], %X2

; NO_COMMON_FACTOR: %[[A:.*]] = mul i32 %X3, %X1
; NO_COMMON_FACTOR: %[[B:.*]] = mul i32 %X3, %X2
; NO_COMMON_FACTOR: %[[C:.*]] = mul i32 %[[A]], %X4
; NO_COMMON_FACTOR: %[[D:.*]] = mul i32 %[[B]], %X4

; CHECK: %[[E:.*]] = xor i32 %[[C]], %[[D]]
; CHECK: ret i32 %[[E]]
define i32 @test1(i32 %X1, i32 %X2, i32 %X3, i32 %X4) {
%A = mul i32 %X3, %X1
%B = mul i32 %X3, %X2
%C = mul i32 %A, %X4
%D = mul i32 %B, %X4
%E = xor i32 %C, %D
ret i32 %E
}
9 changes: 8 additions & 1 deletion utils/hct/hctdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -6525,7 +6525,14 @@ def add_pass(name, type_name, doc, opts):
[],
)
# createTailCallEliminationPass is removed - but is this checked before?
add_pass("reassociate", "Reassociate", "Reassociate expressions", [])
add_pass(
"reassociate",
"Reassociate",
"Reassociate expressions",
[
{"n": "EnableAggressiveReassociation", "t": "bool", "c": 1},
],
)
add_pass(
"loop-rotate",
"LoopRotate",
Expand Down

0 comments on commit 1ee70fd

Please sign in to comment.