mirror of
https://github.com/intel/llvm.git
synced 2026-01-25 19:44:38 +08:00
This reverts commit 314e22bcab, reapplying
PR150735 with a fix for the unstable iteration order exposed by the new
tests (PR151039).
This commit is contained in:
@@ -97,6 +97,8 @@ STATISTIC(MissingAllocForContextId,
|
||||
"Number of missing alloc nodes for context ids");
|
||||
STATISTIC(SkippedCallsCloning,
|
||||
"Number of calls skipped during cloning due to unexpected operand");
|
||||
STATISTIC(MismatchedCloneAssignments,
|
||||
"Number of callsites assigned to call multiple non-matching clones");
|
||||
|
||||
static cl::opt<std::string> DotFilePathPrefix(
|
||||
"memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
|
||||
@@ -2060,6 +2062,20 @@ static bool isMemProfClone(const Function &F) {
|
||||
return F.getName().contains(MemProfCloneSuffix);
|
||||
}
|
||||
|
||||
// Return the clone number of the given function by extracting it from the
|
||||
// memprof suffix. Assumes the caller has already confirmed it is a memprof
|
||||
// clone.
|
||||
static unsigned getMemProfCloneNum(const Function &F) {
|
||||
assert(isMemProfClone(F));
|
||||
auto Pos = F.getName().find_last_of('.');
|
||||
assert(Pos > 0);
|
||||
unsigned CloneNo;
|
||||
bool Err = F.getName().drop_front(Pos + 1).getAsInteger(10, CloneNo);
|
||||
assert(!Err);
|
||||
(void)Err;
|
||||
return CloneNo;
|
||||
}
|
||||
|
||||
std::string ModuleCallsiteContextGraph::getLabel(const Function *Func,
|
||||
const Instruction *Call,
|
||||
unsigned CloneNo) const {
|
||||
@@ -3979,7 +3995,22 @@ IndexCallsiteContextGraph::getAllocationCallType(const CallInfo &Call) const {
|
||||
|
||||
void ModuleCallsiteContextGraph::updateCall(CallInfo &CallerCall,
|
||||
FuncInfo CalleeFunc) {
|
||||
if (CalleeFunc.cloneNo() > 0)
|
||||
auto *CurF = cast<CallBase>(CallerCall.call())->getCalledFunction();
|
||||
auto NewCalleeCloneNo = CalleeFunc.cloneNo();
|
||||
if (isMemProfClone(*CurF)) {
|
||||
// If we already assigned this callsite to call a specific non-default
|
||||
// clone (i.e. not the original function which is clone 0), ensure that we
|
||||
// aren't trying to now update it to call a different clone, which is
|
||||
// indicative of a bug in the graph or function assignment.
|
||||
auto CurCalleeCloneNo = getMemProfCloneNum(*CurF);
|
||||
if (CurCalleeCloneNo != NewCalleeCloneNo) {
|
||||
LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
|
||||
<< CurCalleeCloneNo << " now " << NewCalleeCloneNo
|
||||
<< "\n");
|
||||
MismatchedCloneAssignments++;
|
||||
}
|
||||
}
|
||||
if (NewCalleeCloneNo > 0)
|
||||
cast<CallBase>(CallerCall.call())->setCalledFunction(CalleeFunc.func());
|
||||
OREGetter(CallerCall.call()->getFunction())
|
||||
.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CallerCall.call())
|
||||
@@ -3995,7 +4026,19 @@ void IndexCallsiteContextGraph::updateCall(CallInfo &CallerCall,
|
||||
assert(CI &&
|
||||
"Caller cannot be an allocation which should not have profiled calls");
|
||||
assert(CI->Clones.size() > CallerCall.cloneNo());
|
||||
CI->Clones[CallerCall.cloneNo()] = CalleeFunc.cloneNo();
|
||||
auto NewCalleeCloneNo = CalleeFunc.cloneNo();
|
||||
auto &CurCalleeCloneNo = CI->Clones[CallerCall.cloneNo()];
|
||||
// If we already assigned this callsite to call a specific non-default
|
||||
// clone (i.e. not the original function which is clone 0), ensure that we
|
||||
// aren't trying to now update it to call a different clone, which is
|
||||
// indicative of a bug in the graph or function assignment.
|
||||
if (CurCalleeCloneNo != 0 && CurCalleeCloneNo != NewCalleeCloneNo) {
|
||||
LLVM_DEBUG(dbgs() << "Mismatch in call clone assignment: was "
|
||||
<< CurCalleeCloneNo << " now " << NewCalleeCloneNo
|
||||
<< "\n");
|
||||
MismatchedCloneAssignments++;
|
||||
}
|
||||
CurCalleeCloneNo = NewCalleeCloneNo;
|
||||
}
|
||||
|
||||
// Update the debug information attached to NewFunc to use the clone Name. Note
|
||||
@@ -4714,6 +4757,19 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
|
||||
// where the callers were assigned to different clones of a function.
|
||||
}
|
||||
|
||||
auto FindFirstAvailFuncClone = [&]() {
|
||||
// Find first function in FuncCloneInfos without an assigned
|
||||
// clone of this callsite Node. We should always have one
|
||||
// available at this point due to the earlier cloning when the
|
||||
// FuncCloneInfos size was smaller than the clone number.
|
||||
for (auto &CF : FuncCloneInfos) {
|
||||
if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone))
|
||||
return CF.FuncClone;
|
||||
}
|
||||
assert(false &&
|
||||
"Expected an available func clone for this callsite clone");
|
||||
};
|
||||
|
||||
// See if we can use existing function clone. Walk through
|
||||
// all caller edges to see if any have already been assigned to
|
||||
// a clone of this callsite's function. If we can use it, do so. If not,
|
||||
@@ -4830,16 +4886,7 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
|
||||
// clone of OrigFunc for another caller during this iteration over
|
||||
// its caller edges.
|
||||
if (!FuncCloneAssignedToCurCallsiteClone) {
|
||||
// Find first function in FuncCloneInfos without an assigned
|
||||
// clone of this callsite Node. We should always have one
|
||||
// available at this point due to the earlier cloning when the
|
||||
// FuncCloneInfos size was smaller than the clone number.
|
||||
for (auto &CF : FuncCloneInfos) {
|
||||
if (!FuncCloneToCurNodeCloneMap.count(CF.FuncClone)) {
|
||||
FuncCloneAssignedToCurCallsiteClone = CF.FuncClone;
|
||||
break;
|
||||
}
|
||||
}
|
||||
FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
|
||||
assert(FuncCloneAssignedToCurCallsiteClone);
|
||||
// Assign Clone to FuncCloneAssignedToCurCallsiteClone
|
||||
AssignCallsiteCloneToFuncClone(
|
||||
@@ -4853,6 +4900,31 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
|
||||
FuncCloneAssignedToCurCallsiteClone);
|
||||
}
|
||||
}
|
||||
// If we didn't assign a function clone to this callsite clone yet, e.g.
|
||||
// none of its callers has a non-null call, do the assignment here.
|
||||
// We want to ensure that every callsite clone is assigned to some
|
||||
// function clone, so that the call updates below work as expected.
|
||||
// In particular if this is the original callsite, we want to ensure it
|
||||
// is assigned to the original function, otherwise the original function
|
||||
// will appear available for assignment to other callsite clones,
|
||||
// leading to unintended effects. For one, the unknown and not updated
|
||||
// callers will call into cloned paths leading to the wrong hints,
|
||||
// because they still call the original function (clone 0). Also,
|
||||
// because all callsites start out as being clone 0 by default, we can't
|
||||
// easily distinguish between callsites explicitly assigned to clone 0
|
||||
// vs those never assigned, which can lead to multiple updates of the
|
||||
// calls when invoking updateCall below, with mismatched clone values.
|
||||
// TODO: Add a flag to the callsite nodes or some other mechanism to
|
||||
// better distinguish and identify callsite clones that are not getting
|
||||
// assigned to function clones as expected.
|
||||
if (!FuncCloneAssignedToCurCallsiteClone) {
|
||||
FuncCloneAssignedToCurCallsiteClone = FindFirstAvailFuncClone();
|
||||
assert(FuncCloneAssignedToCurCallsiteClone &&
|
||||
"No available func clone for this callsite clone");
|
||||
AssignCallsiteCloneToFuncClone(
|
||||
FuncCloneAssignedToCurCallsiteClone, Call, Clone,
|
||||
/*IsAlloc=*/AllocationCallToContextNodeMap.contains(Call));
|
||||
}
|
||||
}
|
||||
if (VerifyCCG) {
|
||||
checkNode<DerivedCCG, FuncTy, CallTy>(Node);
|
||||
|
||||
145
llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll
Normal file
145
llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll
Normal file
@@ -0,0 +1,145 @@
|
||||
;; Make sure we assign the original callsite to a function clone (which will be
|
||||
;; the original function clone), even when we cannot update its caller (due to
|
||||
;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use
|
||||
;; the original function for a different clone, leading to confusion later when
|
||||
;; rewriting the calls.
|
||||
|
||||
;; -stats requires asserts
|
||||
; REQUIRES: asserts
|
||||
|
||||
; RUN: opt -thinlto-bc %s >%t.o
|
||||
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
|
||||
; RUN: -supports-hot-cold-new \
|
||||
; RUN: -r=%t.o,A,plx \
|
||||
; RUN: -r=%t.o,B,plx \
|
||||
; RUN: -r=%t.o,C,plx \
|
||||
; RUN: -r=%t.o,D,plx \
|
||||
; RUN: -r=%t.o,E,plx \
|
||||
; RUN: -r=%t.o,F,plx \
|
||||
; RUN: -r=%t.o,G,plx \
|
||||
; RUN: -r=%t.o,A1,plx \
|
||||
; RUN: -r=%t.o,B1,plx \
|
||||
; RUN: -r=%t.o,_Znwm, \
|
||||
; RUN: -memprof-verify-ccg -memprof-verify-nodes -debug-only=memprof-context-disambiguation \
|
||||
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
|
||||
; RUN: -o %t.out 2>&1 | FileCheck %s \
|
||||
; RUN: --implicit-check-not="Mismatch in call clone assignment" \
|
||||
; RUN: --implicit-check-not="Number of callsites assigned to call multiple non-matching clones"
|
||||
|
||||
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
|
||||
|
||||
; ModuleID = '<stdin>'
|
||||
source_filename = "reduced.ll"
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; IR-LABEL: define dso_local void @A()
|
||||
define void @A() #0 {
|
||||
; IR: call void @C()
|
||||
call void @C()
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: define dso_local void @B()
|
||||
define void @B() #0 {
|
||||
; IR: call void @C.memprof.1()
|
||||
call void @C(), !callsite !1
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: define dso_local void @C()
|
||||
define void @C() #0 {
|
||||
; IR: call void @F()
|
||||
call void @F(), !callsite !16
|
||||
; IR: call void @D()
|
||||
call void @D(), !callsite !2
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: define dso_local void @D()
|
||||
define void @D() #0 {
|
||||
; IR: call void @E()
|
||||
call void @E(), !callsite !3
|
||||
; IR: call void @G()
|
||||
call void @G(), !callsite !17
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: define dso_local void @E()
|
||||
define void @E() #0 {
|
||||
; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]]
|
||||
%1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: define dso_local void @F()
|
||||
define void @F() #0 {
|
||||
; IR: call void @G()
|
||||
call void @G(), !callsite !17
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: define dso_local void @G()
|
||||
define void @G() #0 {
|
||||
; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD]]
|
||||
%2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: define dso_local void @A1()
|
||||
define void @A1() #0 {
|
||||
; IR: call void @C()
|
||||
call void @C(), !callsite !18
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: define dso_local void @B1()
|
||||
define void @B1() #0 {
|
||||
; IR: call void @C.memprof.1()
|
||||
call void @C(), !callsite !19
|
||||
ret void
|
||||
}
|
||||
|
||||
; IR-LABEL: define dso_local void @C.memprof.1()
|
||||
; IR: call void @F.memprof.1()
|
||||
; IR: call void @D.memprof.1()
|
||||
|
||||
; IR-LABEL: define dso_local void @D.memprof.1()
|
||||
; IR: call void @E.memprof.1()
|
||||
; IR: call void @G()
|
||||
|
||||
; IR-LABEL: define dso_local void @E.memprof.1()
|
||||
; IR: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]]
|
||||
|
||||
; IR-LABEL: define dso_local void @F.memprof.1()
|
||||
; IR: call void @G.memprof.1()
|
||||
|
||||
; IR-LABEL: define dso_local void @G.memprof.1()
|
||||
; IR: call ptr @_Znwm(i64 0) #[[COLD]]
|
||||
|
||||
declare ptr @_Znwm(i64)
|
||||
|
||||
attributes #0 = { noinline optnone }
|
||||
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
|
||||
; IR: attributes #[[COLD]] = { "memprof"="cold" }
|
||||
|
||||
!0 = !{i64 123}
|
||||
!1 = !{i64 234}
|
||||
!2 = !{i64 345}
|
||||
!3 = !{i64 456}
|
||||
!4 = !{!5, !7}
|
||||
!5 = !{!6, !"notcold"}
|
||||
!6 = !{i64 567, i64 456, i64 345, i64 123}
|
||||
!7 = !{!8, !"cold"}
|
||||
!8 = !{i64 567, i64 456, i64 345, i64 234}
|
||||
!9 = !{i64 567}
|
||||
!10 = !{!11, !13}
|
||||
!11 = !{!12, !"notcold"}
|
||||
!12 = !{i64 678, i64 891, i64 789, i64 912}
|
||||
!13 = !{!14, !"cold"}
|
||||
!14 = !{i64 678, i64 891, i64 789, i64 812}
|
||||
!15 = !{i64 678}
|
||||
!16 = !{i64 789}
|
||||
!17 = !{i64 891}
|
||||
!18 = !{i64 912}
|
||||
!19 = !{i64 812}
|
||||
@@ -0,0 +1,130 @@
|
||||
;; Make sure we assign the original callsite to a function clone (which will be
|
||||
;; the original function clone), even when we cannot update its caller (due to
|
||||
;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use
|
||||
;; the original function for a different clone, leading to confusion later when
|
||||
;; rewriting the calls.
|
||||
|
||||
;; -stats requires asserts
|
||||
; REQUIRES: asserts
|
||||
|
||||
; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \
|
||||
; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats -debug \
|
||||
; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | \
|
||||
; RUN: FileCheck %s --implicit-check-not="Mismatch in call clone assignment" \
|
||||
; RUN: --implicit-check-not="Number of callsites assigned to call multiple non-matching clones"
|
||||
|
||||
|
||||
; ModuleID = '<stdin>'
|
||||
source_filename = "reduced.ll"
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; CHECK-LABEL: define void @A()
|
||||
define void @A() {
|
||||
; CHECK: call void @C()
|
||||
call void @C()
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @B()
|
||||
define void @B() {
|
||||
; CHECK: call void @C.memprof.1()
|
||||
call void @C(), !callsite !1
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @C()
|
||||
define void @C() {
|
||||
; CHECK: call void @F()
|
||||
call void @F(), !callsite !16
|
||||
; CHECK: call void @D()
|
||||
call void @D(), !callsite !2
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @D()
|
||||
define void @D() {
|
||||
; CHECK: call void @E()
|
||||
call void @E(), !callsite !3
|
||||
; CHECK: call void @G()
|
||||
call void @G(), !callsite !17
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @E()
|
||||
define void @E() {
|
||||
; CHECK: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]]
|
||||
%1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @F()
|
||||
define void @F() {
|
||||
; CHECK: call void @G()
|
||||
call void @G(), !callsite !17
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @G()
|
||||
define void @G() {
|
||||
; CHECK: call ptr @_Znwm(i64 0) #[[NOTCOLD]]
|
||||
%2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @A1()
|
||||
define void @A1() {
|
||||
; CHECK: call void @C()
|
||||
call void @C(), !callsite !18
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @B1()
|
||||
define void @B1() {
|
||||
; CHECK: call void @C.memprof.1()
|
||||
call void @C(), !callsite !19
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: define void @C.memprof.1()
|
||||
; CHECK: call void @F.memprof.1()
|
||||
; CHECK: call void @D.memprof.1()
|
||||
|
||||
; CHECK-LABEL: define void @D.memprof.1()
|
||||
; CHECK: call void @E.memprof.1()
|
||||
; CHECK: call void @G()
|
||||
|
||||
; CHECK-LABEL: define void @E.memprof.1()
|
||||
; CHECK: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]]
|
||||
|
||||
; CHECK-LABEL: define void @F.memprof.1()
|
||||
; CHECK: call void @G.memprof.1()
|
||||
|
||||
; CHECK-LABEL: define void @G.memprof.1()
|
||||
; CHECK: call ptr @_Znwm(i64 0) #[[COLD]]
|
||||
|
||||
declare ptr @_Znwm(i64)
|
||||
|
||||
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
|
||||
; IR: attributes #[[COLD]] = { "memprof"="cold" }
|
||||
|
||||
!0 = !{i64 123}
|
||||
!1 = !{i64 234}
|
||||
!2 = !{i64 345}
|
||||
!3 = !{i64 456}
|
||||
!4 = !{!5, !7}
|
||||
!5 = !{!6, !"notcold"}
|
||||
!6 = !{i64 567, i64 456, i64 345, i64 123}
|
||||
!7 = !{!8, !"cold"}
|
||||
!8 = !{i64 567, i64 456, i64 345, i64 234}
|
||||
!9 = !{i64 567}
|
||||
!10 = !{!11, !13}
|
||||
!11 = !{!12, !"notcold"}
|
||||
!12 = !{i64 678, i64 891, i64 789, i64 912}
|
||||
!13 = !{!14, !"cold"}
|
||||
!14 = !{i64 678, i64 891, i64 789, i64 812}
|
||||
!15 = !{i64 678}
|
||||
!16 = !{i64 789}
|
||||
!17 = !{i64 891}
|
||||
!18 = !{i64 912}
|
||||
!19 = !{i64 812}
|
||||
Reference in New Issue
Block a user