mirror of
https://github.com/intel/llvm.git
synced 2026-02-09 01:52:26 +08:00
[CSSPGO] Fix the issue of missing callee profile matches (#85715)
Two fixes related to the callee/inlinee profile: 1. Fix the bug that the matching results are missing to distribute to the callee profiles (should be pass-by-reference). 2. Narrow imported function matching to checksum mismatched functions. More context: before we run matchings for all imported functions even checksums are matched, however, after we fix 1), we got a regression, it's likely due to the matching is not no-op for checksum matched function, so we want to make it consistent to only run matching for checksum mismatched (imported)functions. Since the metadata(pseudo_probe_desc) are dropped for imported function, we leverage the function attribute mechanism and add a new function attribute(`profile-checksum-mismatch`) to transfer the info from pre-link to post-link.
This commit is contained in:
@@ -86,9 +86,12 @@ template <> struct IRTraits<BasicBlock> {
|
||||
// SampleProfileProber.
|
||||
class PseudoProbeManager {
|
||||
DenseMap<uint64_t, PseudoProbeDescriptor> GUIDToProbeDescMap;
|
||||
const ThinOrFullLTOPhase LTOPhase;
|
||||
|
||||
public:
|
||||
PseudoProbeManager(const Module &M) {
|
||||
PseudoProbeManager(const Module &M,
|
||||
ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
|
||||
: LTOPhase(LTOPhase) {
|
||||
if (NamedMDNode *FuncInfo =
|
||||
M.getNamedMetadata(PseudoProbeDescMetadataName)) {
|
||||
for (const auto *Operand : FuncInfo->operands()) {
|
||||
@@ -126,17 +129,15 @@ public:
|
||||
|
||||
bool profileIsValid(const Function &F, const FunctionSamples &Samples) const {
|
||||
const auto *Desc = getDesc(F);
|
||||
if (!Desc) {
|
||||
LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function "
|
||||
<< F.getName() << "\n");
|
||||
return false;
|
||||
}
|
||||
if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
|
||||
LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
|
||||
<< "\n");
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
assert((LTOPhase != ThinOrFullLTOPhase::ThinLTOPostLink || !Desc ||
|
||||
profileIsHashMismatched(*Desc, Samples) ==
|
||||
F.hasFnAttribute("profile-checksum-mismatch")) &&
|
||||
"In post-link, profile checksum matching state doesn't match "
|
||||
"function 'profile-checksum-mismatch' attribute.");
|
||||
// The desc for import function is unavailable. Check the function attribute
|
||||
// for mismatch.
|
||||
return (!Desc && !F.hasFnAttribute("profile-checksum-mismatch")) ||
|
||||
(Desc && !profileIsHashMismatched(*Desc, Samples));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -453,6 +453,7 @@ class SampleProfileMatcher {
|
||||
Module &M;
|
||||
SampleProfileReader &Reader;
|
||||
const PseudoProbeManager *ProbeManager;
|
||||
const ThinOrFullLTOPhase LTOPhase;
|
||||
SampleProfileMap FlattenedProfiles;
|
||||
// For each function, the matcher generates a map, of which each entry is a
|
||||
// mapping from the source location of current build to the source location in
|
||||
@@ -504,8 +505,9 @@ class SampleProfileMatcher {
|
||||
|
||||
public:
|
||||
SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
|
||||
const PseudoProbeManager *ProbeManager)
|
||||
: M(M), Reader(Reader), ProbeManager(ProbeManager){};
|
||||
const PseudoProbeManager *ProbeManager,
|
||||
ThinOrFullLTOPhase LTOPhase)
|
||||
: M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){};
|
||||
void runOnModule();
|
||||
void clearMatchingData() {
|
||||
// Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
|
||||
@@ -521,7 +523,7 @@ private:
|
||||
return &It->second;
|
||||
return nullptr;
|
||||
}
|
||||
void runOnFunction(const Function &F);
|
||||
void runOnFunction(Function &F);
|
||||
void findIRAnchors(const Function &F,
|
||||
std::map<LineLocation, StringRef> &IRAnchors);
|
||||
void findProfileAnchors(
|
||||
@@ -1911,15 +1913,22 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
|
||||
bool Changed = false;
|
||||
|
||||
if (FunctionSamples::ProfileIsProbeBased) {
|
||||
if (!ProbeManager->profileIsValid(F, *Samples)) {
|
||||
LLVM_DEBUG({
|
||||
if (!ProbeManager->getDesc(F))
|
||||
dbgs() << "Probe descriptor missing for Function " << F.getName()
|
||||
<< "\n";
|
||||
});
|
||||
|
||||
if (ProbeManager->profileIsValid(F, *Samples)) {
|
||||
++NumMatchedProfile;
|
||||
} else {
|
||||
++NumMismatchedProfile;
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "Profile is invalid due to CFG mismatch for Function "
|
||||
<< F.getName() << "\n");
|
||||
++NumMismatchedProfile;
|
||||
if (!SalvageStaleProfile)
|
||||
return false;
|
||||
}
|
||||
++NumMatchedProfile;
|
||||
} else {
|
||||
if (getFunctionLoc(F) == 0)
|
||||
return false;
|
||||
@@ -2185,7 +2194,7 @@ bool SampleProfileLoader::doInitialization(Module &M,
|
||||
|
||||
// Load pseudo probe descriptors for probe-based function samples.
|
||||
if (Reader->profileIsProbeBased()) {
|
||||
ProbeManager = std::make_unique<PseudoProbeManager>(M);
|
||||
ProbeManager = std::make_unique<PseudoProbeManager>(M, LTOPhase);
|
||||
if (!ProbeManager->moduleIsProbed(M)) {
|
||||
const char *Msg =
|
||||
"Pseudo-probe-based profile requires SampleProfileProbePass";
|
||||
@@ -2197,8 +2206,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
|
||||
|
||||
if (ReportProfileStaleness || PersistProfileStaleness ||
|
||||
SalvageStaleProfile) {
|
||||
MatchingManager =
|
||||
std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
|
||||
MatchingManager = std::make_unique<SampleProfileMatcher>(
|
||||
M, *Reader, ProbeManager.get(), LTOPhase);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -2452,7 +2461,7 @@ void SampleProfileMatcher::runStaleProfileMatching(
|
||||
}
|
||||
}
|
||||
|
||||
void SampleProfileMatcher::runOnFunction(const Function &F) {
|
||||
void SampleProfileMatcher::runOnFunction(Function &F) {
|
||||
// We need to use flattened function samples for matching.
|
||||
// Unlike IR, which includes all callsites from the source code, the callsites
|
||||
// in profile only show up when they are hit by samples, i,e. the profile
|
||||
@@ -2481,8 +2490,16 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
|
||||
// support for pseudo-probe.
|
||||
if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
|
||||
!ProbeManager->profileIsValid(F, *FSFlattened)) {
|
||||
// The matching result will be saved to IRToProfileLocationMap, create a new
|
||||
// map for each function.
|
||||
// For imported functions, the checksum metadata(pseudo_probe_desc) are
|
||||
// dropped, so we leverage function attribute(profile-checksum-mismatch) to
|
||||
// transfer the info: add the attribute during pre-link phase and check it
|
||||
// during post-link phase(see "profileIsValid").
|
||||
if (FunctionSamples::ProfileIsProbeBased &&
|
||||
LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
|
||||
F.addFnAttr("profile-checksum-mismatch");
|
||||
|
||||
// The matching result will be saved to IRToProfileLocationMap, create a
|
||||
// new map for each function.
|
||||
auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
|
||||
runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
|
||||
IRToProfileLocationMap);
|
||||
@@ -2758,8 +2775,9 @@ void SampleProfileMatcher::distributeIRToProfileLocationMap(
|
||||
FS.setIRToProfileLocationMap(&(ProfileMappings->second));
|
||||
}
|
||||
|
||||
for (auto &Inlinees : FS.getCallsiteSamples()) {
|
||||
for (auto FS : Inlinees.second) {
|
||||
for (auto &Callees :
|
||||
const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
|
||||
for (auto &FS : Callees.second) {
|
||||
distributeIRToProfileLocationMap(FS.second);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
main:252:0
|
||||
1: 0
|
||||
2: 50
|
||||
5: 50
|
||||
7: bar:102
|
||||
1: 51
|
||||
2: baz:51
|
||||
1: 51
|
||||
!CFGChecksum: 4294967295
|
||||
!Attributes: 3
|
||||
!CFGChecksum: 281479271677951
|
||||
!Attributes: 2
|
||||
!CFGChecksum: 281582081721716
|
||||
bar:1:1
|
||||
1: 1
|
||||
!CFGChecksum: 281479271677951
|
||||
@@ -0,0 +1,67 @@
|
||||
; REQUIRES: x86_64-linux
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof -pass-remarks=inline -S -o %t 2>&1 | FileCheck %s --check-prefix=INLINE
|
||||
; RUN: FileCheck %s < %t
|
||||
; RUN: FileCheck %s < %t --check-prefix=MERGE
|
||||
|
||||
|
||||
; Make sure bar is inlined into main for attr merging verification.
|
||||
; INLINE: 'bar' inlined into 'main'
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @baz() #0 {
|
||||
entry:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @bar() #0 !dbg !11 {
|
||||
; CHECK: define {{.*}} @bar() {{.*}} #[[#BAR_ATTR:]] !
|
||||
entry:
|
||||
%call = call i32 @baz()
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define i32 @main() #0 {
|
||||
; MERGE: define {{.*}} @main() {{.*}} #[[#MAIN_ATTR:]] !
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.cond, %entry
|
||||
%call = call i32 @bar(), !dbg !14
|
||||
br label %for.cond
|
||||
}
|
||||
|
||||
; CHECK: attributes #[[#BAR_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}}
|
||||
|
||||
; Verify the attribute is not merged into the caller.
|
||||
; MERGE-NOT: attributes #[[#MAIN_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}}
|
||||
|
||||
attributes #0 = { "use-sample-profile" }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!7}
|
||||
!llvm.pseudo_probe_desc = !{!8, !9, !10}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "test.c", directory: "/home", checksumkind: CSK_MD5, checksum: "0df0c950a93a603a7d13f0a9d4623642")
|
||||
!2 = !{!3}
|
||||
!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression())
|
||||
!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true)
|
||||
!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
|
||||
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
||||
!7 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!8 = !{i64 7546896869197086323, i64 4294967295, !"baz"}
|
||||
!9 = !{i64 -2012135647395072713, i64 281530612780802, !"bar"}
|
||||
!10 = !{i64 -2624081020897602054, i64 281582081721716, !"main"}
|
||||
!11 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !12, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13)
|
||||
!12 = distinct !DISubroutineType(types: !13)
|
||||
!13 = !{}
|
||||
!14 = !DILocation(line: 15, column: 10, scope: !15)
|
||||
!15 = !DILexicalBlockFile(scope: !16, file: !1, discriminator: 186646591)
|
||||
!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 14, column: 40)
|
||||
!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 14, column: 3)
|
||||
!18 = distinct !DILexicalBlock(scope: !19, file: !1, line: 14, column: 3)
|
||||
!19 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 12, type: !20, scopeLine: 13, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13)
|
||||
!20 = !DISubroutineType(types: !13)
|
||||
@@ -0,0 +1,63 @@
|
||||
; REQUIRES: x86_64-linux
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-impl -pass-remarks=inline 2>&1 | FileCheck %s
|
||||
|
||||
|
||||
; CHECK: Run stale profile matching for bar
|
||||
; CHECK: Callsite with callee:baz is matched from 4 to 2
|
||||
; CHECK: 'baz' inlined into 'main' to match profiling context with (cost=always): preinliner at callsite bar:3:8.4 @ main:3:10.7
|
||||
|
||||
; CHECK: Probe descriptor missing for Function bar
|
||||
; CHECK: Profile is invalid due to CFG mismatch for Function bar
|
||||
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @main() #0 {
|
||||
%1 = call i32 @bar(), !dbg !13
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define available_externally i32 @bar() #1 !dbg !21 {
|
||||
%1 = call i32 @baz(), !dbg !23
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define available_externally i32 @baz() #0 !dbg !25 {
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
attributes #0 = { "use-sample-profile" }
|
||||
attributes #1 = { "profile-checksum-mismatch" "use-sample-profile" }
|
||||
|
||||
!llvm.dbg.cu = !{!0, !7, !9}
|
||||
!llvm.module.flags = !{!11}
|
||||
!llvm.pseudo_probe_desc = !{!12}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "test.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "7220f1a2d70ff869f1a6ab7958e3c393")
|
||||
!2 = !{!3}
|
||||
!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression())
|
||||
!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true)
|
||||
!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
|
||||
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
|
||||
!7 = distinct !DICompileUnit(language: DW_LANG_C11, file: !8, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
|
||||
!8 = !DIFile(filename: "test1.v1.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "76696bd6bfe16a9f227fe03cfdb6a82c")
|
||||
!9 = distinct !DICompileUnit(language: DW_LANG_C11, file: !10, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
|
||||
!10 = !DIFile(filename: "test2.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "553093afc026f9c73562eb3b0c5b7532")
|
||||
!11 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!12 = !{i64 -2624081020897602054, i64 281582081721716, !"main"}
|
||||
!13 = !DILocation(line: 8, column: 10, scope: !14)
|
||||
!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 186646591)
|
||||
!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 7, column: 40)
|
||||
!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 7, column: 3)
|
||||
!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 7, column: 3)
|
||||
!18 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !19, scopeLine: 6, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20)
|
||||
!19 = distinct !DISubroutineType(types: !20)
|
||||
!20 = !{}
|
||||
!21 = distinct !DISubprogram(name: "bar", scope: !8, file: !8, line: 3, type: !22, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !20)
|
||||
!22 = !DISubroutineType(types: !20)
|
||||
!23 = !DILocation(line: 6, column: 8, scope: !24)
|
||||
!24 = !DILexicalBlockFile(scope: !21, file: !8, discriminator: 186646567)
|
||||
!25 = distinct !DISubprogram(name: "baz", scope: !10, file: !10, line: 1, type: !22, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !9, retainedNodes: !20)
|
||||
@@ -106,7 +106,7 @@ define available_externally dso_local i32 @bar(i32 noundef %0) local_unnamed_add
|
||||
ret i32 %2, !dbg !132
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
|
||||
attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" "profile-checksum-mismatch"}
|
||||
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
|
||||
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
|
||||
attributes #3 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
|
||||
|
||||
@@ -48,6 +48,8 @@
|
||||
; }
|
||||
; }
|
||||
|
||||
; Verify not running profile matching for checksum matched function.
|
||||
; CHECK-NOT: Run stale profile matching for bar
|
||||
|
||||
; CHECK: Run stale profile matching for main
|
||||
|
||||
|
||||
Reference in New Issue
Block a user