-
Notifications
You must be signed in to change notification settings - Fork 13k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SampleFDO][NFC] Refactoring sample reader to support on-demand read profiles for given functions #104654
Conversation
@llvm/pr-subscribers-pgo Author: Lei Wang (wlei-llvm) ChangesCurrently in extended binary format, sample reader only read the profiles when the function are in the current module at initialization time, this extends the support to read the arbitrary profiles for given input functions in later stage. It's used for #101053. Full diff: https://github.com/llvm/llvm-project/pull/104654.diff 2 Files Affected:
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index f4bdc6525308d2..a5b53f79ab55df 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -380,6 +380,22 @@ class SampleProfileReader {
return sampleprof_error::success;
}
+ /// Read sample profiles for the given functions. Currently it's only used for
+ /// extended binary format to load the profiles on-demand.
+ std::error_code read(const DenseSet<StringRef> &FuncsToUse) {
+ if (std::error_code EC = read(FuncsToUse, Profiles))
+ return EC;
+ return sampleprof_error::success;
+ };
+
+ /// Read sample profiles for the given functions and write them to the given
+ /// profile map. Currently it's only used for extended binary format to load
+ /// the profiles on-demand.
+ virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles) {
+ return sampleprof_error::not_implemented;
+ };
+
/// The implementaion to read sample profiles from the associated file.
virtual std::error_code readImpl() = 0;
@@ -522,6 +538,16 @@ class SampleProfileReader {
std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
+ // A map from a function's context hash to its meta data section range, used
+ // for on-demand read function profile metadata.
+ std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
+ FuncMetadataIndex;
+
+ std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
+
+ /// Whether the profile has attribute metadata.
+ bool ProfileHasAttribute = false;
+
/// \brief Whether samples are collected based on pseudo probes.
bool ProfileIsProbeBased = false;
@@ -621,6 +647,8 @@ class SampleProfileReaderBinary : public SampleProfileReader {
/// Read the next function profile instance.
std::error_code readFuncProfile(const uint8_t *Start);
+ std::error_code readFuncProfile(const uint8_t *Start,
+ SampleProfileMap &Profiles);
/// Read the contents of the given profile instance.
std::error_code readProfile(FunctionSamples &FProfile);
@@ -720,11 +748,15 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
std::error_code readSecHdrTableEntry(uint64_t Idx);
std::error_code readSecHdrTable();
+ std::error_code readFuncMetadata(bool ProfileHasAttribute,
+ SampleProfileMap &Profiles);
std::error_code readFuncMetadata(bool ProfileHasAttribute);
std::error_code readFuncMetadata(bool ProfileHasAttribute,
FunctionSamples *FProfile);
std::error_code readFuncOffsetTable();
std::error_code readFuncProfiles();
+ std::error_code readFuncProfiles(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles);
std::error_code readNameTableSec(bool IsMD5, bool FixedLengthMD5);
std::error_code readCSNameTableSec();
std::error_code readProfileSymbolList();
@@ -776,6 +808,13 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
/// the reader has been given a module.
bool collectFuncsFromModule() override;
+ /// Read the profiles on-demand for the given functions. This is used after
+ /// stale call graph matching finds new functions whose profiles aren't loaded
+ /// at the beginning and we need to loaded the profiles explicitly for
+ /// potential matching.
+ std::error_code read(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles) override;
+
std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
return std::move(ProfSymList);
};
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 4752465fc072e0..bac7791f202ef7 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -653,7 +653,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
}
std::error_code
-SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start,
+ SampleProfileMap &Profiles) {
Data = Start;
auto NumHeadSamples = readNumber<uint64_t>();
if (std::error_code EC = NumHeadSamples.getError())
@@ -678,6 +679,11 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
return sampleprof_error::success;
}
+std::error_code
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+ return readFuncProfile(Start, Profiles);
+}
+
std::error_code SampleProfileReaderBinary::readImpl() {
ProfileIsFS = ProfileIsFSDisciminator;
FunctionSamples::ProfileIsFS = ProfileIsFS;
@@ -725,6 +731,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
break;
}
case SecLBRProfile:
+ LBRProfileSecRange = std::make_pair(Data, End);
if (std::error_code EC = readFuncProfiles())
return EC;
break;
@@ -745,9 +752,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
ProfileIsProbeBased =
hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
- bool HasAttribute =
+ ProfileHasAttribute =
hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
- if (std::error_code EC = readFuncMetadata(HasAttribute))
+ if (std::error_code EC = readFuncMetadata(ProfileHasAttribute))
return EC;
break;
}
@@ -791,6 +798,19 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
return false;
}
+std::error_code
+SampleProfileReaderExtBinaryBase::read(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles) {
+ Data = LBRProfileSecRange.first;
+ End = LBRProfileSecRange.second;
+ if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+ return EC;
+ End = Data;
+
+ if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, Profiles))
+ return EC;
+ return sampleprof_error::success;
+}
bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
if (!M)
@@ -838,6 +858,97 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
return sampleprof_error::success;
}
+std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles(
+ const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+ const uint8_t *Start = Data;
+
+ if (Remapper) {
+ for (auto Name : FuncsToUse) {
+ Remapper->insert(Name);
+ }
+ }
+
+ if (ProfileIsCS) {
+ assert(useFuncOffsetList());
+ DenseSet<uint64_t> FuncGuidsToUse;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse)
+ FuncGuidsToUse.insert(Function::getGUID(Name));
+ }
+
+ // For each function in current module, load all context profiles for
+ // the function as well as their callee contexts which can help profile
+ // guided importing for ThinLTO. This can be achieved by walking
+ // through an ordered context container, where contexts are laid out
+ // as if they were walked in preorder of a context trie. While
+ // traversing the trie, a link to the highest common ancestor node is
+ // kept so that all of its decendants will be loaded.
+ const SampleContext *CommonContext = nullptr;
+ for (const auto &NameOffset : FuncOffsetList) {
+ const auto &FContext = NameOffset.first;
+ FunctionId FName = FContext.getFunction();
+ StringRef FNameString;
+ if (!useMD5())
+ FNameString = FName.stringRef();
+
+ // For function in the current module, keep its farthest ancestor
+ // context. This can be used to load itself and its child and
+ // sibling contexts.
+ if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
+ (!useMD5() && (FuncsToUse.count(FNameString) ||
+ (Remapper && Remapper->exist(FNameString))))) {
+ if (!CommonContext || !CommonContext->isPrefixOf(FContext))
+ CommonContext = &FContext;
+ }
+
+ if (CommonContext == &FContext ||
+ (CommonContext && CommonContext->isPrefixOf(FContext))) {
+ // Load profile for the current context which originated from
+ // the common ancestor.
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
+ }
+ } else if (useMD5()) {
+ assert(!useFuncOffsetList());
+ for (auto Name : FuncsToUse) {
+ auto GUID = MD5Hash(Name);
+ auto iter = FuncOffsetTable.find(GUID);
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+ return EC;
+ }
+ } else if (Remapper) {
+ assert(useFuncOffsetList());
+ for (auto NameOffset : FuncOffsetList) {
+ SampleContext FContext(NameOffset.first);
+ auto FuncName = FContext.getFunction();
+ StringRef FuncNameStr = FuncName.stringRef();
+ if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
+ continue;
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+ return EC;
+ }
+ } else {
+ assert(!useFuncOffsetList());
+ for (auto Name : FuncsToUse) {
+
+ auto iter = FuncOffsetTable.find(MD5Hash(Name));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+ return EC;
+ }
+ }
+
+ return sampleprof_error::success;
+}
+
std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
// Collect functions used by current module if the Reader has been
// given a module.
@@ -858,88 +969,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
assert(Data == End && "More data is read than expected");
} else {
// Load function profiles on demand.
- if (Remapper) {
- for (auto Name : FuncsToUse) {
- Remapper->insert(Name);
- }
- }
-
- if (ProfileIsCS) {
- assert(useFuncOffsetList());
- DenseSet<uint64_t> FuncGuidsToUse;
- if (useMD5()) {
- for (auto Name : FuncsToUse)
- FuncGuidsToUse.insert(Function::getGUID(Name));
- }
-
- // For each function in current module, load all context profiles for
- // the function as well as their callee contexts which can help profile
- // guided importing for ThinLTO. This can be achieved by walking
- // through an ordered context container, where contexts are laid out
- // as if they were walked in preorder of a context trie. While
- // traversing the trie, a link to the highest common ancestor node is
- // kept so that all of its decendants will be loaded.
- const SampleContext *CommonContext = nullptr;
- for (const auto &NameOffset : FuncOffsetList) {
- const auto &FContext = NameOffset.first;
- FunctionId FName = FContext.getFunction();
- StringRef FNameString;
- if (!useMD5())
- FNameString = FName.stringRef();
-
- // For function in the current module, keep its farthest ancestor
- // context. This can be used to load itself and its child and
- // sibling contexts.
- if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
- (!useMD5() && (FuncsToUse.count(FNameString) ||
- (Remapper && Remapper->exist(FNameString))))) {
- if (!CommonContext || !CommonContext->isPrefixOf(FContext))
- CommonContext = &FContext;
- }
-
- if (CommonContext == &FContext ||
- (CommonContext && CommonContext->isPrefixOf(FContext))) {
- // Load profile for the current context which originated from
- // the common ancestor.
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- }
- } else if (useMD5()) {
- assert(!useFuncOffsetList());
- for (auto Name : FuncsToUse) {
- auto GUID = MD5Hash(Name);
- auto iter = FuncOffsetTable.find(GUID);
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- } else if (Remapper) {
- assert(useFuncOffsetList());
- for (auto NameOffset : FuncOffsetList) {
- SampleContext FContext(NameOffset.first);
- auto FuncName = FContext.getFunction();
- StringRef FuncNameStr = FuncName.stringRef();
- if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
- continue;
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- } else {
- assert(!useFuncOffsetList());
- for (auto Name : FuncsToUse) {
- auto iter = FuncOffsetTable.find(MD5Hash(Name));
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- }
+ if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+ return EC;
Data = End;
}
assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
@@ -1245,6 +1276,27 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
return sampleprof_error::success;
}
+std::error_code
+SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
+ SampleProfileMap &Profiles) {
+ if (FuncMetadataIndex.empty())
+ return sampleprof_error::success;
+
+ for (auto &I : Profiles) {
+ FunctionSamples *FProfile = &I.second;
+ auto R = FuncMetadataIndex.find(FProfile->getContext().getHashCode());
+ if (R == FuncMetadataIndex.end())
+ continue;
+
+ Data = R->second.first;
+ End = R->second.second;
+ if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
+ return EC;
+ assert(Data == End && "More data is read than expected");
+ }
+ return sampleprof_error::success;
+}
+
std::error_code
SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
while (Data < End) {
@@ -1257,8 +1309,11 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
if (It != Profiles.end())
FProfile = &It->second;
+ const uint8_t *Start = Data;
if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
return EC;
+
+ FuncMetadataIndex[FContext.getHashCode()] = {Start, Data};
}
assert(Data == End && "More data is read than expected");
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
8cece4d
to
97f715c
Compare
…ofiles for given functions
97f715c
to
db555d5
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm, thanks.
Currently in extended binary format, sample reader only read the profiles when the function are in the current module at initialization time, this extends the support to read the arbitrary profiles for given input functions in later stage. It's used for #101053.