-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CodeGen][StaticDataSplitter]Support constant pool partitioning #129781
base: users/mingmingl-llvm/spr/globalvariables
Are you sure you want to change the base?
[CodeGen][StaticDataSplitter]Support constant pool partitioning #129781
Conversation
cc @Colibrow |
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-backend-aarch64 Author: Mingming Liu (mingmingl-llvm) ChangesThis is a follow-up patch of #125756 In this PR, static-data-splitter pass produces the aggregated profile counts of constants for constant pools in a global state ( This implementation covers both x86 and aarch64 asm printer. Patch is 25.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129781.diff 11 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 3da63af5ba571..2018f411be796 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -18,6 +18,8 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/CodeGen/DwarfStringPoolEntry.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -132,6 +134,12 @@ class AsmPrinter : public MachineFunctionPass {
/// default, this is equal to CurrentFnSym.
MCSymbol *CurrentFnSymForSize = nullptr;
+ /// Provides the profile information for constants.
+ const StaticDataProfileInfo *SDPI = nullptr;
+
+ /// The profile summary information.
+ const ProfileSummaryInfo *PSI = nullptr;
+
/// Map a basic block section ID to the begin and end symbols of that section
/// which determine the section's range.
struct MBBSectionRange {
diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 10f0594c267ae..563980fb24ab8 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -68,6 +68,12 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
const Constant *C,
Align &Alignment) const override;
+ /// Similar to the function above, but append \p SectionSuffix to the section
+ /// name.
+ MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
+ const Constant *C, Align &Alignment,
+ StringRef SectionSuffix) const override;
+
MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
const TargetMachine &TM) const override;
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index a5ed1b29dc1bc..1956748b8058b 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -104,6 +104,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
SectionKind Kind, const Constant *C,
Align &Alignment) const;
+ /// Similar to the function above, but append \p SectionSuffix to the section
+ /// name.
+ virtual MCSection *getSectionForConstant(const DataLayout &DL,
+ SectionKind Kind, const Constant *C,
+ Align &Alignment,
+ StringRef SectionSuffix) const;
+
virtual MCSection *
getSectionForMachineBasicBlock(const Function &F,
const MachineBasicBlock &MBB,
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 3c4280333e76d..60018afe2f8a7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() {
if (!CPE.isMachineConstantPoolEntry())
C = CPE.Val.ConstVal;
- MCSection *S = getObjFileLowering().getSectionForConstant(
- getDataLayout(), Kind, C, Alignment);
+ MCSection *S = nullptr;
+ if (TM.Options.EnableStaticDataPartitioning) {
+ SmallString<8> SectionNameSuffix;
+ if (C && SDPI && PSI) {
+ auto Count = SDPI->getConstantProfileCount(C);
+ if (Count) {
+ if (PSI->isHotCount(*Count)) {
+ SectionNameSuffix.append("hot");
+ } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
+ SectionNameSuffix.append("unlikely");
+ }
+ }
+ }
+
+ S = getObjFileLowering().getSectionForConstant(
+ getDataLayout(), Kind, C, Alignment, SectionNameSuffix);
+ } else {
+ S = getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C,
+ Alignment);
+ }
// The number of sections are small, just do a linear search from the
// last section to the first.
diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp
index c647c3075d79c..4768c0829ea49 100644
--- a/llvm/lib/CodeGen/StaticDataSplitter.cpp
+++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp
@@ -10,7 +10,7 @@
// for the following types of static data:
// - Jump tables
// - Module-internal global variables
-// - Constant pools (TODO)
+// - Constant pools
//
// For the original RFC of this pass please see
// https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744
@@ -117,16 +117,17 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ const MachineConstantPool *MCP = MF.getConstantPool();
// Jump table could be used by either terminating instructions or
// non-terminating ones, so we walk all instructions and use
// `MachineOperand::isJTI()` to identify jump table operands.
- // Similarly, `MachineOperand::isCPI()` can identify constant pool usages
- // in the same loop.
+ // Similarly, `MachineOperand::isCPI()` is used to identify constant pool
+ // usages in the same loop.
for (const auto &MBB : MF) {
for (const MachineInstr &I : MBB) {
for (const MachineOperand &Op : I.operands()) {
- if (!Op.isJTI() && !Op.isGlobal())
+ if (!Op.isJTI() && !Op.isGlobal() && !Op.isCPI())
continue;
std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
@@ -148,7 +149,7 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
if (MJTI->updateJumpTableEntryHotness(JTI, Hotness))
++NumChangedJumpTables;
- } else {
+ } else if (Op.isGlobal()) {
// Find global variables with local linkage.
const GlobalVariable *GV =
getLocalLinkageGlobalVariable(Op.getGlobal());
@@ -159,6 +160,20 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) {
!inStaticDataSection(GV, TM))
continue;
SDPI->addConstantProfileCount(GV, Count);
+ } else {
+ assert(Op.isCPI() && "Op must be constant pool index in this branch");
+ int CPI = Op.getIndex();
+ if (CPI == -1)
+ continue;
+
+ assert(MCP != nullptr && "Constant pool info is not available.");
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+
+ if (CPE.isMachineConstantPoolEntry())
+ continue;
+
+ const Constant *C = CPE.Val.ConstVal;
+ SDPI->addConstantProfileCount(C, Count);
}
}
}
@@ -203,17 +218,34 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) {
void StaticDataSplitter::annotateStaticDataWithoutProfiles(
const MachineFunction &MF) {
+ const MachineConstantPool *MCP = MF.getConstantPool();
for (const auto &MBB : MF) {
for (const MachineInstr &I : MBB) {
for (const MachineOperand &Op : I.operands()) {
- if (!Op.isGlobal())
- continue;
- const GlobalVariable *GV =
- getLocalLinkageGlobalVariable(Op.getGlobal());
- if (!GV || GV->getName().starts_with("llvm.") ||
- !inStaticDataSection(GV, MF.getTarget()))
+ if (!Op.isGlobal() && !Op.isCPI())
continue;
- SDPI->addConstantProfileCount(GV, std::nullopt);
+ if (Op.isGlobal()) {
+ const GlobalVariable *GV =
+ getLocalLinkageGlobalVariable(Op.getGlobal());
+ if (!GV || GV->getName().starts_with("llvm.") ||
+ !inStaticDataSection(GV, MF.getTarget()))
+ continue;
+ SDPI->addConstantProfileCount(GV, std::nullopt);
+ } else {
+ assert(Op.isCPI() && "Op must be constant pool index in this branch");
+ int CPI = Op.getIndex();
+ if (CPI == -1)
+ continue;
+
+ assert(MCP != nullptr && "Constant pool info is not available.");
+ const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI];
+
+ if (CPE.isMachineConstantPoolEntry())
+ continue;
+
+ const Constant *C = CPE.Val.ConstVal;
+ SDPI->addConstantProfileCount(C, std::nullopt);
+ }
}
}
}
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index be2f5fb0b4a79..6cf8a0e9d211f 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1072,6 +1072,41 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
return DataRelROSection;
}
+MCSection *TargetLoweringObjectFileELF::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
+ StringRef SectionPrefix) const {
+ // TODO: Share code between this function and
+ // MCObjectInfo::initELFMCObjectFileInfo.
+ if (SectionPrefix.empty())
+ return getSectionForConstant(DL, Kind, C, Alignment);
+
+ auto &Context = getContext();
+ if (Kind.isMergeableConst4() && MergeableConst4Section)
+ return Context.getELFSection(".rodata.cst4." + SectionPrefix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE, 4);
+ if (Kind.isMergeableConst8() && MergeableConst8Section)
+ return Context.getELFSection(".rodata.cst8." + SectionPrefix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE, 8);
+ if (Kind.isMergeableConst16() && MergeableConst16Section)
+ return Context.getELFSection(".rodata.cst16." + SectionPrefix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE, 16);
+ if (Kind.isMergeableConst32() && MergeableConst32Section)
+ return Context.getELFSection(".rodata.cst32." + SectionPrefix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_MERGE, 32);
+ if (Kind.isReadOnly())
+ return Context.getELFSection(".rodata" + SectionPrefix, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC);
+
+ assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
+ return Context.getELFSection(".data.rel.ro" + SectionPrefix,
+ ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_WRITE);
+}
+
/// Returns a unique section for the given machine basic block.
MCSection *TargetLoweringObjectFileELF::getSectionForMachineBasicBlock(
const Function &F, const MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index fc38bfe93c1e0..74a78457e42ec 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -226,6 +226,16 @@ class AArch64AsmPrinter : public AsmPrinter {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>();
+ if (PSIW) {
+ PSI = &PSIW->getPSI();
+ }
+
+ auto *SDPIW = getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>();
+ if (SDPIW) {
+ SDPI = &SDPIW->getStaticDataProfileInfo();
+ }
+
AArch64FI = MF.getInfo<AArch64FunctionInfo>();
STI = &MF.getSubtarget<AArch64Subtarget>();
diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp
index 02c101055d9f3..07f5532bee17e 100644
--- a/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -386,6 +386,16 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant(
return DataSection;
}
+MCSection *TargetLoweringObjectFile::getSectionForConstant(
+ const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment,
+ StringRef SectionPrefix) const {
+ // Fallback to `getSectionForConstant` without `SectionPrefix` parameter if it
+ // is empty.
+ if (SectionPrefix.empty())
+ return getSectionForConstant(DL, Kind, C, Alignment);
+ report_fatal_error("Unimplemented");
+}
+
MCSection *TargetLoweringObjectFile::getSectionForMachineBasicBlock(
const Function &F, const MachineBasicBlock &MBB,
const TargetMachine &TM) const {
diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 79aa898e18bfa..f58974e79efb9 100644
--- a/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -20,6 +20,7 @@
#include "X86InstrInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86Subtarget.h"
+#include "llvm/Analysis/StaticDataProfileInfo.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineConstantPool.h"
@@ -61,6 +62,15 @@ X86AsmPrinter::X86AsmPrinter(TargetMachine &TM,
/// runOnMachineFunction - Emit the function body.
///
bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
+ auto *PSIW = getAnalysisIfAvailable<ProfileSummaryInfoWrapperPass>();
+ if (PSIW) {
+ PSI = &PSIW->getPSI();
+ }
+
+ auto *SDPIW = getAnalysisIfAvailable<StaticDataProfileInfoWrapperPass>();
+ if (SDPIW) {
+ SDPI = &SDPIW->getStaticDataProfileInfo();
+ }
Subtarget = &MF.getSubtarget<X86Subtarget>();
SMShadowTracker.startFunction(MF);
diff --git a/llvm/test/CodeGen/AArch64/constant-pool-partition.ll b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
new file mode 100644
index 0000000000000..5d2df59d34317
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/constant-pool-partition.ll
@@ -0,0 +1,141 @@
+; RUN: llc -mtriple=aarch64 -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Repeat the RUN command above for big-endian systems.
+; RUN: llc -mtriple=aarch64_be -enable-split-machine-functions \
+; RUN: -partition-static-data-sections=true -function-sections=true \
+; RUN: -unique-section-names=false \
+; RUN: %s -o - 2>&1 | FileCheck %s --dump-input=always
+
+; Tests that constant pool hotness is aggregated across the module. The
+; static-data-splitter processes data from cold_func first, unprofiled_func
+; secondly, and then hot_func. Specifically, tests that
+; - If a constant is accessed by hot functions, all constant pools for this
+; constant (e.g., from an unprofiled function, or cold function) should have
+; `.hot` suffix.
+; - Similarly if a constant is accessed by both cold function and un-profiled
+; function, constant pools for this constant should not have `.unlikely` suffix.
+
+; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8
+; CHECK: .LCPI0_0:
+; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
+; CHECK: .section .rodata.cst8.unlikely,"aM",@progbits,8
+; CHECK: .LCPI0_1:
+; CHECK: .xword 0x3fe5eb851eb851ec // double 0.68500000000000005
+; CHECK: .section .rodata.cst8,"aM",@progbits,8
+; CHECK: .LCPI0_2:
+; CHECK: .byte 0 // 0x0
+; CHECK: .byte 4 // 0x4
+; CHECK: .byte 8 // 0x8
+; CHECK: .byte 12 // 0xc
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+
+; CHECK: .section .rodata.cst8,"aM",@progbits,8
+; CHECK: .LCPI1_0:
+; CHECK: .byte 0 // 0x0
+; CHECK: .byte 4 // 0x4
+; CHECK: .byte 8 // 0x8
+; CHECK: .byte 12 // 0xc
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .byte 255 // 0xff
+; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16
+; CHECK: .LCPI1_1:
+; CHECK: .word 442 // 0x1ba
+; CHECK: .word 100 // 0x64
+; CHECK: .word 0 // 0x0
+; CHECK: .word 0 // 0x0
+
+; CHECK: .section .rodata.cst8.hot,"aM",@progbits,8
+; CHECK: .LCPI2_0:
+; CHECK: .xword 0x3fe5c28f5c28f5c3 // double 0.68000000000000005
+; CHECK: .section .rodata.cst16.hot,"aM",@progbits,16
+; CHECK: .LCPI2_1:
+; CHECK: .word 442 // 0x1ba
+; CHECK: .word 100 // 0x64
+; CHECK: .word 0 // 0x0
+; CHECK: .word 0 // 0x0
+
+; CHECK: .section .rodata.cst32,"aM",@progbits,32
+; CHECK: .globl val
+
+define i32 @cold_func(double %x, <16 x i8> %a, <16 x i8> %b) !prof !16 {
+ %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+ %num = tail call i32 (...) @func_taking_arbitrary_param(double 6.8500000e-01)
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = bitcast <8 x i8> %t1 to <2 x i32>
+ %3 = extractelement <2 x i32> %t2, i32 1
+ %sum = add i32 %2, %3
+ %ret = add i32 %sum, %num
+ ret i32 %ret
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>)
+declare i32 @func_taking_arbitrary_param(...)
+
+define <4 x i1> @unprofiled_func(<16 x i8> %a, <16 x i8> %b) {
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = bitcast <8 x i8> %t1 to <4 x i16>
+ %t3 = zext <4 x i16> %t2 to <4 x i32>
+ %cmp = icmp ule <4 x i32> <i32 442, i32 100, i32 0, i32 0>, %t3
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @hot_func(i32 %0, <4 x i32> %a) !prof !17 {
+ %2 = tail call i32 (...) @func_taking_arbitrary_param(double 6.800000e-01)
+ %b = icmp ule <4 x i32> %a, <i32 442, i32 100, i32 0, i32 0>
+ ret <4 x i1> %b
+}
+
+@val = unnamed_addr constant i256 1
+
+define i32 @main(i32 %0, ptr %1) !prof !16 {
+ br label %7
+
+5: ; preds = %7
+ %x = call double @double_func()
+ %a = call <16 x i8> @vector_func_16i8()
+ %b = call <16 x i8> @vector_func_16i8()
+ call void @cold_func(double %x, <16 x i8> %a, <16 x i8> %b)
+ ret i32 0
+
+7: ; preds = %7, %2
+ %8 = phi i32 [ 0, %2 ], [ %10, %7 ]
+ %9 = call i32 @rand()
+ call void @hot_func(i32 %9)
+ %10 = add i32 %8, 1
+ %11 = icmp eq i32 %10, 100000
+ br i1 %11, label %5, label %7, !prof !18
+}
+
+declare i32 @rand()
+declare double @double_func()
+declare <4 x i32> @vector_func()
+declare <16 x i8> @vector_func_16i8()
+
+!llvm.module.flags = !{!1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 1460617}
+!5 = !{!"MaxCount", i64 849536}
+!6 = !{!"MaxInternalCount", i64 32769}
+!7 = !{!"MaxFunctionCount", i64 849536}
+!8 = !{!"NumCounts", i64 23784}
+!9 = !{!"NumFunctions", i64 3301}
+!10 = !{!"IsPartialProfile", i64 0}
+!11 = !{!"PartialProfileRatio", double 0.000000e+00}
+!12 = !{!"DetailedSummary", !13}
+!13 = !{!14, !15}
+!14 = !{i32 990000, i64 166, i32 73}
+!15 = !{i32 999999, i64 3, i32 1463}
+!16 = !{!"function_entry_count", i64 1}
+!17 = !{!"function_entry_count", i64 100000}
+!18 = !{!"br...
[truncated]
|
@@ -1072,6 +1072,41 @@ MCSection *TargetLoweringObjectFileELF::getSectionForConstant( | |||
return DataRelROSection; | |||
} | |||
|
|||
MCSection *TargetLoweringObjectFileELF::getSectionForConstant( | |||
const DataLayout &DL, SectionKind Kind, const Constant *C, Align &Alignment, | |||
StringRef SectionPrefix) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should it be suffix?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
@@ -2791,8 +2791,26 @@ void AsmPrinter::emitConstantPool() { | |||
if (!CPE.isMachineConstantPoolEntry()) | |||
C = CPE.Val.ConstVal; | |||
|
|||
MCSection *S = getObjFileLowering().getSectionForConstant( | |||
getDataLayout(), Kind, C, Alignment); | |||
MCSection *S = nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggest refactoring this code into:
auto SectionSuffix = getSectionSuffix(...);
S = ... getSectionForConstant(..., SectionSuffix);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done.
continue; | ||
SDPI->addConstantProfileCount(GV, std::nullopt); | ||
} else { | ||
assert(Op.isCPI() && "Op must be constant pool index in this branch"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some refactoring can probably be done to share code
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added getConstant
helper function to share code between profile and non-profile path.
StringRef SectionPrefix) const { | ||
// Fallback to `getSectionForConstant` without `SectionPrefix` parameter if it | ||
// is empty. | ||
if (SectionPrefix.empty()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
possible to just assert SectionPrefix is empty?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean something like assert(!SectionPrefix.empty() && "Call another method if section prefix is empty
here?
I think with the refactor suggested above (https://github.com/llvm/llvm-project/pull/129781/files#r1980454779), we can allow the new interface to handle empty section prefix by falling back to the original interface. What do you think about it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
current implementation is fine. Perhaps improve the message in report_fatal_error like "Not implemented for the object format".
for (const auto &MBB : MF) { | ||
for (const MachineInstr &I : MBB) { | ||
for (const MachineOperand &Op : I.operands()) { | ||
if (!Op.isJTI() && !Op.isGlobal()) | ||
if (!Op.isJTI() && !Op.isGlobal() && !Op.isCPI()) | ||
continue; | ||
|
||
std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
line 168 can be hoisted out to the outmost loop (i.e line 163).
SDPI->addConstantProfileCount(GV, Count); | ||
} else if (const Constant *C = | ||
getConstant(Op, MF.getTarget(), MF.getConstantPool())) { | ||
SDPI->addConstantProfileCount(C, Count); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes, we don't need to update "NumChangedJumpTables" here. but why do we need to return "true" or "false" for this function?
if (!GV || GV->getName().starts_with("llvm.") || | ||
!inStaticDataSection(GV, TM)) | ||
return nullptr; | ||
return GV; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the motivation to handle "GlobalVariable" here? Any test to cover it?
} | ||
} | ||
} | ||
return SectionNameSuffix.str(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is returning a StringRef whose underlying memory is stack allocated. I don't think you need a SmallString here, just return std::string or c-string and convert it at the callsite?
@@ -2769,6 +2769,23 @@ namespace { | |||
|
|||
} // end anonymous namespace | |||
|
|||
StringRef AsmPrinter::getConstantSectionSuffix(const Constant *C) const { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Consider refactoring a bit to reduce nesting --
if(!TM.Options.EnableStaticDataPartitioning || C == nullptr || SDPI == nullptr || PSI == nullptr) return "";
auto Count = SDPI->getConstantProfileCount(C);
if(!Count.has_value()) return "";
if (PSI->isHotCount(*Count)) {
return "hot";
} else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
return "unlikely";
}
return "";
!inStaticDataSection(GV, MF.getTarget())) | ||
continue; | ||
SDPI->addConstantProfileCount(GV, std::nullopt); | ||
const Constant *C = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: Can be combined with the code below --
if(auto *C = getConstant(....); C) {
SDPI->addConstantProfileCount(C, std::nullopt);
}
Same for PSIW
etc below.
This is a follow-up patch of #125756
In this PR, static-data-splitter pass produces the aggregated profile counts of constants for constant pools in a global state (
StateDataProfileInfo
), and asm printer consumes the profile counts to produce.hot
or.unlikely
prefixes.This implementation covers both x86 and aarch64 asm printer.