diff --git a/llvm/include/llvm/Analysis/StaticDataProfileInfo.h b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h new file mode 100644 index 0000000000000..4220f7d820db9 --- /dev/null +++ b/llvm/include/llvm/Analysis/StaticDataProfileInfo.h @@ -0,0 +1,68 @@ +#ifndef LLVM_ANALYSIS_STATICDATAPROFILEINFO_H +#define LLVM_ANALYSIS_STATICDATAPROFILEINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/Constant.h" +#include "llvm/Pass.h" + +namespace llvm { + +/// A class that holds the constants that represent static data and their +/// profile information and provides methods to operate on them. +class StaticDataProfileInfo { +public: + /// Accummulate the profile count of a constant that will be lowered to static + /// data sections. + DenseMap ConstantProfileCounts; + + /// Keeps track of the constants that are seen at least once without profile + /// counts. + DenseSet ConstantWithoutCounts; + +public: + StaticDataProfileInfo() = default; + + /// If \p Count is not nullopt, add it to the profile count of the constant \p + /// C in a saturating way, and clamp the count to \p getInstrMaxCountValue if + /// the result exceeds it. Otherwise, mark the constant as having no profile + /// count. + void addConstantProfileCount(const Constant *C, + std::optional Count); + + /// If \p C has a count, return it. Otherwise, return std::nullopt. + std::optional getConstantProfileCount(const Constant *C) const; + + /// Return true if the constant \p C is seen at least once without profiles. + bool hasUnknownCount(const Constant *C) const { + return ConstantWithoutCounts.count(C); + } +}; + +/// This wraps the StaticDataProfileInfo object as an immutable pass, for a +/// backend pass to operate on. +class StaticDataProfileInfoWrapperPass : public ImmutablePass { +public: + static char ID; + StaticDataProfileInfoWrapperPass(); + bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; + + StaticDataProfileInfo &getStaticDataProfileInfo() { return *Info; } + const StaticDataProfileInfo &getStaticDataProfileInfo() const { + return *Info; + } + + /// This pass provides StaticDataProfileInfo for reads/writes but does not + /// modify \p M or other analysis. All analysis are preserved. + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + +private: + std::unique_ptr Info; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_STATICDATAPROFILEINFO_H diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index 0182f21bee5f5..d528bb8d3ca6c 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -71,10 +71,15 @@ namespace llvm { /// using profile information. MachineFunctionPass *createMachineFunctionSplitterPass(); - /// createStaticDataSplitterPass - This pass partitions a static data section - /// into a hot and cold section using profile information. + /// createStaticDataSplitterPass - This is a machine-function pass that + /// categorizes static data hotness using profile information. MachineFunctionPass *createStaticDataSplitterPass(); + /// createStaticDataAnnotatorPASS - This is a module pass that reads from + /// StaticDataProfileInfoWrapperPass and annotates the section prefix of + /// global variables. + ModulePass *createStaticDataAnnotatorPass(); + /// MachineFunctionPrinter pass - This pass prints out the machine function to /// the given stream as a debugging tool. MachineFunctionPass * diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 2232b8b6f55e5..a2adb6c358d5b 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -203,6 +203,8 @@ void initializeMachineLoopInfoWrapperPassPass(PassRegistry &); void initializeMachineModuleInfoWrapperPassPass(PassRegistry &); void initializeMachineOptimizationRemarkEmitterPassPass(PassRegistry &); void initializeMachineOutlinerPass(PassRegistry &); +void initializeStaticDataProfileInfoWrapperPassPass(PassRegistry &); +void initializeStaticDataAnnotatorPass(PassRegistry &); void initializeMachinePipelinerPass(PassRegistry &); void initializeMachinePostDominatorTreeWrapperPassPass(PassRegistry &); void initializeMachineRegionInfoPassPass(PassRegistry &); diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def index f78e5d6b7d0d4..fad52c55548cc 100644 --- a/llvm/include/llvm/Passes/MachinePassRegistry.def +++ b/llvm/include/llvm/Passes/MachinePassRegistry.def @@ -220,6 +220,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS( #define DUMMY_MACHINE_MODULE_PASS(NAME, PASS_NAME) #endif DUMMY_MACHINE_MODULE_PASS("machine-outliner", MachineOutlinerPass) +DUMMY_MACHINE_MODULE_PASS("static-data-annotator", StaticDataAnnotator) DUMMY_MACHINE_MODULE_PASS("pseudo-probe-inserter", PseudoProbeInserterPass) DUMMY_MACHINE_MODULE_PASS("mir-debugify", DebugifyMachineModule) DUMMY_MACHINE_MODULE_PASS("mir-check-debugify", CheckDebugMachineModulePass) diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index a44f6c6a135ef..fb2d7a82f670b 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -126,6 +126,7 @@ add_llvm_component_library(LLVMAnalysis ScalarEvolutionAliasAnalysis.cpp ScalarEvolutionDivision.cpp ScalarEvolutionNormalization.cpp + StaticDataProfileInfo.cpp StackLifetime.cpp StackSafetyAnalysis.cpp StructuralHash.cpp diff --git a/llvm/lib/Analysis/StaticDataProfileInfo.cpp b/llvm/lib/Analysis/StaticDataProfileInfo.cpp new file mode 100644 index 0000000000000..b124e101f8cdf --- /dev/null +++ b/llvm/lib/Analysis/StaticDataProfileInfo.cpp @@ -0,0 +1,50 @@ +#include "llvm/Analysis/StaticDataProfileInfo.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/InitializePasses.h" +#include "llvm/ProfileData/InstrProf.h" +#include + +using namespace llvm; +void StaticDataProfileInfo::addConstantProfileCount( + const Constant *C, std::optional Count) { + if (!Count) { + ConstantWithoutCounts.insert(C); + return; + } + uint64_t &OriginalCount = ConstantProfileCounts[C]; + OriginalCount += llvm::SaturatingAdd(*Count, OriginalCount); + // Clamp the count to getInstrMaxCountValue. InstrFDO reserves a few + // large values for special use. + if (OriginalCount > getInstrMaxCountValue()) + OriginalCount = getInstrMaxCountValue(); +} + +std::optional +StaticDataProfileInfo::getConstantProfileCount(const Constant *C) const { + auto I = ConstantProfileCounts.find(C); + if (I == ConstantProfileCounts.end()) + return std::nullopt; + return I->second; +} + +bool StaticDataProfileInfoWrapperPass::doInitialization(Module &M) { + Info.reset(new StaticDataProfileInfo()); + return false; +} + +bool StaticDataProfileInfoWrapperPass::doFinalization(Module &M) { + Info.reset(); + return false; +} + +INITIALIZE_PASS(StaticDataProfileInfoWrapperPass, "static-data-profile-info", + "Static Data Profile Info", false, true) + +StaticDataProfileInfoWrapperPass::StaticDataProfileInfoWrapperPass() + : ImmutablePass(ID) { + initializeStaticDataProfileInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); +} + +char StaticDataProfileInfoWrapperPass::ID = 0; diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index 23ec3310079d3..9af1162be745e 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -228,6 +228,7 @@ add_llvm_component_library(LLVMCodeGen StackProtector.cpp StackSlotColoring.cpp StaticDataSplitter.cpp + StaticDataAnnotator.cpp SwiftErrorValueTracking.cpp SwitchLoweringUtils.cpp TailDuplication.cpp diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp index 6311ec2b666e6..e740caf1e87de 100644 --- a/llvm/lib/CodeGen/CodeGen.cpp +++ b/llvm/lib/CodeGen/CodeGen.cpp @@ -132,6 +132,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackProtectorPass(Registry); initializeStackSlotColoringLegacyPass(Registry); initializeStaticDataSplitterPass(Registry); + initializeStaticDataAnnotatorPass(Registry); initializeStripDebugMachineModulePass(Registry); initializeTailDuplicateLegacyPass(Registry); initializeTargetPassConfigPass(Registry); diff --git a/llvm/lib/CodeGen/StaticDataAnnotator.cpp b/llvm/lib/CodeGen/StaticDataAnnotator.cpp new file mode 100644 index 0000000000000..04d918585f8af --- /dev/null +++ b/llvm/lib/CodeGen/StaticDataAnnotator.cpp @@ -0,0 +1,119 @@ +//===- StaticDataAnnotator - Annotate static data's section prefix --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// To reason about module-wide data hotness in a module granularity, this file +// implements a module pass StaticDataAnnotator to work coordinately with the +// StaticDataSplitter pass. +// +// The StaticDataSplitter pass is a machine function pass. It analyzes data +// hotness based on code and adds counters in the StaticDataProfileInfo. +// The StaticDataAnnotator pass is a module pass. It iterates global variables +// in the module, looks up counters from StaticDataProfileInfo and sets the +// section prefix based on profiles. +// +// The three-pass structure is implemented for practical reasons, to work around +// the limitation that a module pass based on legacy pass manager cannot make +// use of MachineBlockFrequencyInfo analysis. In the future, we can consider +// porting the StaticDataSplitter pass to a module-pass using the new pass +// manager framework. That way, analysis are lazily computed as opposed to +// eagerly scheduled, and a module pass can use MachineBlockFrequencyInfo. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Analysis.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "static-data-annotator" + +using namespace llvm; + +class StaticDataAnnotator : public ModulePass { +public: + static char ID; + + StaticDataProfileInfo *SDPI = nullptr; + const ProfileSummaryInfo *PSI = nullptr; + + StaticDataAnnotator() : ModulePass(ID) { + initializeStaticDataAnnotatorPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); + ModulePass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Static Data Annotator"; } + + bool runOnModule(Module &M) override; +}; + +// Returns true if the global variable already has a section prefix that is the +// same as `Prefix`. +static bool alreadyHasSectionPrefix(const GlobalVariable &GV, + StringRef Prefix) { + std::optional SectionPrefix = GV.getSectionPrefix(); + return SectionPrefix && (*SectionPrefix == Prefix); +} + +bool StaticDataAnnotator::runOnModule(Module &M) { + SDPI = &getAnalysis() + .getStaticDataProfileInfo(); + PSI = &getAnalysis().getPSI(); + + if (!PSI->hasProfileSummary()) + return false; + + bool Changed = false; + for (auto &GV : M.globals()) { + if (GV.isDeclarationForLinker()) + continue; + + // Skip global variables without profile counts. The module may not be + // profiled or instrumented. + auto Count = SDPI->getConstantProfileCount(&GV); + if (!Count) + continue; + + if (PSI->isHotCount(*Count) && !alreadyHasSectionPrefix(GV, "hot")) { + // The variable counter is hot, set 'hot' section prefix if the section + // prefix isn't hot already. + GV.setSectionPrefix("hot"); + Changed = true; + } else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(&GV) && + !alreadyHasSectionPrefix(GV, "unlikely")) { + // The variable counter is cold, set 'unlikely' section prefix when + // 1) the section prefix isn't unlikely already, and + // 2) the variable is not seen without profile counts. The reason is that + // a variable without profile counts doesn't have all its uses profiled, + // for example when a function is not instrumented, or not sampled (new + // code paths). + GV.setSectionPrefix("unlikely"); + Changed = true; + } + } + + return Changed; +} + +char StaticDataAnnotator::ID = 0; + +INITIALIZE_PASS(StaticDataAnnotator, DEBUG_TYPE, "Static Data Annotator", false, + false) + +ModulePass *llvm::createStaticDataAnnotatorPass() { + return new StaticDataAnnotator(); +} diff --git a/llvm/lib/CodeGen/StaticDataSplitter.cpp b/llvm/lib/CodeGen/StaticDataSplitter.cpp index 0965fe85acfc7..c647c3075d79c 100644 --- a/llvm/lib/CodeGen/StaticDataSplitter.cpp +++ b/llvm/lib/CodeGen/StaticDataSplitter.cpp @@ -9,15 +9,15 @@ // The pass uses branch profile data to assign hotness based section qualifiers // for the following types of static data: // - Jump tables +// - Module-internal global variables // - Constant pools (TODO) -// - Other module-internal data (TODO) // // For the original RFC of this pass please see // https://discourse.llvm.org/t/rfc-profile-guided-static-data-partitioning/83744 -#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/StaticDataProfileInfo.h" #include "llvm/CodeGen/MBFIWrapper.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -27,9 +27,12 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" using namespace llvm; @@ -45,15 +48,27 @@ class StaticDataSplitter : public MachineFunctionPass { const MachineBranchProbabilityInfo *MBPI = nullptr; const MachineBlockFrequencyInfo *MBFI = nullptr; const ProfileSummaryInfo *PSI = nullptr; + StaticDataProfileInfo *SDPI = nullptr; - // Update LLVM statistics for a machine function without profiles. - void updateStatsWithoutProfiles(const MachineFunction &MF); - // Update LLVM statistics for a machine function with profiles. - void updateStatsWithProfiles(const MachineFunction &MF); + // If the global value is a local linkage global variable, return it. + // Otherwise, return nullptr. + const GlobalVariable *getLocalLinkageGlobalVariable(const GlobalValue *GV); + + // Returns true if the global variable is in one of {.rodata, .bss, .data, + // .data.rel.ro} sections. + bool inStaticDataSection(const GlobalVariable *GV, const TargetMachine &TM); // Use profiles to partition static data. bool partitionStaticDataWithProfiles(MachineFunction &MF); + // Update LLVM statistics for a machine function with profiles. + void updateStatsWithProfiles(const MachineFunction &MF); + + // Update LLVM statistics for a machine function without profiles. + void updateStatsWithoutProfiles(const MachineFunction &MF); + + void annotateStaticDataWithoutProfiles(const MachineFunction &MF); + public: static char ID; @@ -68,6 +83,7 @@ class StaticDataSplitter : public MachineFunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -78,10 +94,14 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { MBFI = &getAnalysis().getMBFI(); PSI = &getAnalysis().getPSI(); + SDPI = &getAnalysis() + .getStaticDataProfileInfo(); + const bool ProfileAvailable = PSI && PSI->hasProfileSummary() && MBFI && MF.getFunction().hasProfileData(); if (!ProfileAvailable) { + annotateStaticDataWithoutProfiles(MF); updateStatsWithoutProfiles(MF); return false; } @@ -95,6 +115,7 @@ bool StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) { bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { int NumChangedJumpTables = 0; + const TargetMachine &TM = MF.getTarget(); MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); // Jump table could be used by either terminating instructions or @@ -105,6 +126,11 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { for (const auto &MBB : MF) { for (const MachineInstr &I : MBB) { for (const MachineOperand &Op : I.operands()) { + if (!Op.isJTI() && !Op.isGlobal()) + continue; + + std::optional Count = MBFI->getBlockProfileCount(&MBB); + if (Op.isJTI()) { assert(MJTI != nullptr && "Jump table info is not available."); const int JTI = Op.getIndex(); @@ -117,11 +143,22 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { // Hotness is based on source basic block hotness. // TODO: PSI APIs are about instruction hotness. Introduce API for // data access hotness. - if (PSI->isColdBlock(&MBB, MBFI)) + if (Count && PSI->isColdCount(*Count)) Hotness = MachineFunctionDataHotness::Cold; if (MJTI->updateJumpTableEntryHotness(JTI, Hotness)) ++NumChangedJumpTables; + } else { + // Find global variables with local linkage. + const GlobalVariable *GV = + getLocalLinkageGlobalVariable(Op.getGlobal()); + // Skip 'special' global variables conservatively because they are + // often handled specially, and skip those not in static data + // sections. + if (!GV || GV->getName().starts_with("llvm.") || + !inStaticDataSection(GV, TM)) + continue; + SDPI->addConstantProfileCount(GV, Count); } } } @@ -129,6 +166,23 @@ bool StaticDataSplitter::partitionStaticDataWithProfiles(MachineFunction &MF) { return NumChangedJumpTables > 0; } +const GlobalVariable * +StaticDataSplitter::getLocalLinkageGlobalVariable(const GlobalValue *GV) { + // LLVM IR Verifier requires that a declaration must have valid declaration + // linkage, and local linkages are not among the valid ones. So there is no + // need to check GV is not a declaration here. + return (GV && GV->hasLocalLinkage()) ? dyn_cast(GV) : nullptr; +} + +bool StaticDataSplitter::inStaticDataSection(const GlobalVariable *GV, + const TargetMachine &TM) { + assert(GV && "Caller guaranteed"); + + SectionKind Kind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + return Kind.isData() || Kind.isReadOnly() || Kind.isReadOnlyWithRel() || + Kind.isBSS(); +} + void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) { if (!AreStatisticsEnabled()) return; @@ -147,6 +201,24 @@ void StaticDataSplitter::updateStatsWithProfiles(const MachineFunction &MF) { } } +void StaticDataSplitter::annotateStaticDataWithoutProfiles( + const MachineFunction &MF) { + for (const auto &MBB : MF) { + for (const MachineInstr &I : MBB) { + for (const MachineOperand &Op : I.operands()) { + if (!Op.isGlobal()) + continue; + const GlobalVariable *GV = + getLocalLinkageGlobalVariable(Op.getGlobal()); + if (!GV || GV->getName().starts_with("llvm.") || + !inStaticDataSection(GV, MF.getTarget())) + continue; + SDPI->addConstantProfileCount(GV, std::nullopt); + } + } + } +} + void StaticDataSplitter::updateStatsWithoutProfiles(const MachineFunction &MF) { if (!AreStatisticsEnabled()) return; @@ -163,6 +235,7 @@ INITIALIZE_PASS_BEGIN(StaticDataSplitter, DEBUG_TYPE, "Split static data", INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(StaticDataProfileInfoWrapperPass) INITIALIZE_PASS_END(StaticDataSplitter, DEBUG_TYPE, "Split static data", false, false) diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 5d9da9df9092a..7f89043c0b20c 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1257,8 +1257,13 @@ void TargetPassConfig::addMachinePasses() { } } addPass(createMachineFunctionSplitterPass()); - if (SplitStaticData || TM->Options.EnableStaticDataPartitioning) + if (SplitStaticData || TM->Options.EnableStaticDataPartitioning) { + // The static data splitter pass is a machine function pass. and + // static data annotator pass is a module-wide pass. See the file comment + // in StaticDataAnnotator.cpp for the motivation. addPass(createStaticDataSplitterPass()); + addPass(createStaticDataAnnotatorPass()); + } } // We run the BasicBlockSections pass if either we need BB sections or BB // address map (or both). diff --git a/llvm/test/CodeGen/X86/global-variable-partition.ll b/llvm/test/CodeGen/X86/global-variable-partition.ll new file mode 100644 index 0000000000000..b216047a5ea66 --- /dev/null +++ b/llvm/test/CodeGen/X86/global-variable-partition.ll @@ -0,0 +1,198 @@ +; The static-data-splitter processes data from @cold_func first, +; @unprofiled_func secondly, and @hot_func after the two functions above. +; Tests that data hotness is based on aggregated module-wide profile +; information. This way linker-mergable data is emitted once per module. + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The three RUN commands set `-relocation-model=pic` so `hot_relro_array` and +; `cold_relro_array` are placed in the .data.rel.ro-prefixed section. + +; This RUN command sets `-data-sections=true -unique-section-names=true` so data +; sections are uniqufied by numbers. +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -data-sections=true \ +; RUN: -unique-section-names=true -relocation-model=pic \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=SYM,DATA + +; This RUN command sets `-data-sections=true -unique-section-names=false` so +; data sections are uniqufied by variable names. +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -data-sections=true \ +; RUN: -unique-section-names=false -relocation-model=pic \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=UNIQ,DATA + +; This RUN command sets `-data-sections=false -unique-section-names=false`. +; RUN: llc -mtriple=x86_64-unknown-linux-gnu -enable-split-machine-functions \ +; RUN: -partition-static-data-sections=true -data-sections=false \ +; RUN: -unique-section-names=false -relocation-model=pic \ +; RUN: %s -o - 2>&1 | FileCheck %s --check-prefixes=AGG,DATA + +; For @.str and @.str.1 +; SYM: .section .rodata.str1.1.hot. +; UNIQ: .section .rodata.str1.1.hot.,"aMS",@progbits,1 +; AGG: .section .rodata.str1.1.hot +; DATA: .L.str +; DATA: "hot\t" +; DATA: .L.str.1 +; DATA: "%d\t%d\t%d\n" + +; For @hot_relro_array +; SYM: .section .data.rel.ro.hot.hot_relro_array +; UNIQ: .section .data.rel.ro.hot.,"aw",@progbits,unique,3 +; AGG: .section .data.rel.ro.hot.,"aw",@progbits + +; For @hot_data, which is accessed by {cold_func, unprofiled_func, hot_func}. +; SYM: .section .data.hot.hot_data,"aw",@progbits +; UNIQ: .section .data.hot.,"aw",@progbits,unique,4 +; AGG: .section .data.hot.,"aw",@progbits + +; For @hot_bss, which is accessed by {unprofiled_func, hot_func}. +; SYM: .section .bss.hot.hot_bss,"aw",@nobits +; UNIQ: .section .bss.hot.,"aw",@nobits,unique,5 +; AGG: .section .bss.hot.,"aw",@nobits + +; For @.str.2 +; SYM: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 +; UNIQ: section .rodata.str1.1.unlikely.,"aMS",@progbits,1 +; AGG: .section .rodata.str1.1.unlikely.,"aMS",@progbits,1 +; DATA: .L.str.2: +; DATA: "cold%d\t%d\t%d\n" + +; For @cold_bss +; SYM: .section .bss.unlikely.cold_bss,"aw",@nobits +; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,6 +; AGG: .section .bss.unlikely.,"aw",@nobits + +; For @cold_data +; SYM: .section .data.unlikely.cold_data,"aw",@progbits +; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,7 +; AGG: .section .data.unlikely.,"aw",@progbits + +; For @cold_relro_array +; SYM: .section .data.rel.ro.unlikely.cold_relro_array,"aw",@progbits +; UNIQ: .section .data.rel.ro.unlikely.,"aw",@progbits,unique,8 +; AGG: .section .data.rel.ro.unlikely.,"aw",@progbits + +; Currently static-data-splitter only analyzes access from code. +; @bss2 and @data3 are indirectly accessed by code through @hot_relro_array +; and @cold_relro_array. A follow-up item is to analyze indirect access via data +; and prune the unlikely list. +; For @bss2 +; SYM: .section .bss.unlikely.bss2,"aw",@nobits +; UNIQ: .section .bss.unlikely.,"aw",@nobits,unique,9 +; AGG: .section .bss.unlikely.,"aw",@nobits + +; For @data3 +; SYM: .section .data.unlikely.data3,"aw",@progbits +; UNIQ: .section .data.unlikely.,"aw",@progbits,unique,10 +; AGG: .section .data.unlikely.,"aw",@progbits + +; For @data_with_unknown_hotness +; SYM: .type .Ldata_with_unknown_hotness,@object # @data_with_unknown_hotness +; SYM: .section .data..Ldata_with_unknown_hotness,"aw",@progbits +; UNIQ: .section .data,"aw",@progbits,unique,11 +; The `.section` directive is omitted for .data with -unique-section-names=false. +; See MCSectionELF::shouldOmitSectionDirective for the implementation details. +; AGG: .data +; DATA: .Ldata_with_unknown_hotness: + +@.str = private unnamed_addr constant [5 x i8] c"hot\09\00", align 1 +@.str.1 = private unnamed_addr constant [10 x i8] c"%d\09%d\09%d\0A\00", align 1 +@hot_relro_array = internal constant [2 x ptr] [ptr @bss2, ptr @data3] +@hot_data = internal global i32 5 +@hot_bss = internal global i32 0 +@.str.2 = private unnamed_addr constant [14 x i8] c"cold%d\09%d\09%d\0A\00", align 1 +@cold_bss = internal global i32 0 +@cold_data = internal global i32 4 +@cold_relro_array = internal constant [2 x ptr] [ptr @data3, ptr @bss2] +@bss2 = internal global i32 0 +@data3 = internal global i32 3 +@data_with_unknown_hotness = private global i32 5 + +define void @cold_func(i32 %0) !prof !15 { + %2 = load i32, ptr @cold_bss + %3 = load i32, ptr @cold_data + %4 = srem i32 %0, 2 + %5 = sext i32 %4 to i64 + %6 = getelementptr inbounds [2 x ptr], ptr @cold_relro_array, i64 0, i64 %5 + %7 = load ptr, ptr %6 + %8 = load i32, ptr %7 + %9 = load i32, ptr @data_with_unknown_hotness + %11 = load i32, ptr @hot_data + %12 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.2, i32 %2, i32 %3, i32 %8, i32 %9, i32 %11) + ret void +} + +define i32 @unprofiled_func() { + %a = load i32, ptr @data_with_unknown_hotness + %b = load i32, ptr @hot_data + %c = load i32, ptr @hot_bss + %ret = call i32 (...) @func_taking_arbitrary_param(i32 %a, i32 %b, i32 %c) + ret i32 %ret +} + +define void @hot_func(i32 %0) !prof !14 { + %2 = call i32 (...) @func_taking_arbitrary_param(ptr @.str) + %3 = srem i32 %0, 2 + %4 = sext i32 %3 to i64 + %5 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %4 + %6 = load ptr, ptr %5 + %7 = load i32, ptr %6 + %8 = load i32, ptr @hot_data + %9 = load i32, ptr @hot_bss + %10 = call i32 (...) @func_taking_arbitrary_param(ptr @.str.1, i32 %7, i32 %8, i32 %9) + ret void +} + +define i32 @main(i32 %0, ptr %1) !prof !15 { + br label %11 + +5: ; preds = %11 + %6 = call i32 @rand() + store i32 %6, ptr @cold_bss + store i32 %6, ptr @cold_data + store i32 %6, ptr @bss2 + store i32 %6, ptr @data3 + call void @cold_func(i32 %6) + ret i32 0 + +11: ; preds = %11, %2 + %12 = phi i32 [ 0, %2 ], [ %19, %11 ] + %13 = call i32 @rand() + %14 = srem i32 %13, 2 + %15 = sext i32 %14 to i64 + %16 = getelementptr inbounds [2 x ptr], ptr @hot_relro_array, i64 0, i64 %15 + %17 = load ptr, ptr %16 + store i32 %13, ptr %17 + store i32 %13, ptr @hot_data + %18 = add i32 %13, 1 + store i32 %18, ptr @hot_bss + call void @hot_func(i32 %12) + %19 = add i32 %12, 1 + %20 = icmp eq i32 %19, 100000 + br i1 %20, label %5, label %11, !prof !16 +} + +declare i32 @rand() +declare i32 @func_taking_arbitrary_param(...) + +!llvm.module.flags = !{!1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 1460183} +!5 = !{!"MaxCount", i64 849024} +!6 = !{!"MaxInternalCount", i64 32769} +!7 = !{!"MaxFunctionCount", i64 849024} +!8 = !{!"NumCounts", i64 23627} +!9 = !{!"NumFunctions", i64 3271} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13} +!12 = !{i32 990000, i64 166, i32 73} +!13 = !{i32 999999, i64 3, i32 1443} +!14 = !{!"function_entry_count", i64 100000} +!15 = !{!"function_entry_count", i64 1} +!16 = !{!"branch_weights", i32 1, i32 99999}