[MemProf] Context disambiguation cloning pass [patch 1a/3]

Support for building, printing, and displaying CallsiteContextGraph
which represents the MemProf metadata contexts. Uses CRTP to enable
support for both IR (regular LTO) and summary (ThinLTO). This patch
includes the support for building it in regular LTO mode (from
memprof and callsite metadata), and the next patch will add the
handling for building it from ThinLTO summaries.

Also includes support for dumping the graph to text and to dot files.

Follow-on patches will contain the support for cloning on the graph and
in the IR.

The graph represents the call contexts in all memprof metadata on
allocation calls, with nodes for the allocations themselves, as well as
for the calls in each context. The graph is initially built from the
allocation memprof metadata (or summary) MIBs. It is then updated to
match calls with callsite metadata onto the nodes, updating it to
reflect any inlining performed on those calls.

Each MIB (representing an allocation's call context with allocation
behavior) is assigned a unique context id during the graph build. The
edges and nodes in the graph are decorated with the context ids they
carry. This is used to correctly update the graph when cloning is
performed so that we can uniquify the context for a single (possibly
cloned) allocation.

Depends on D140786.

Differential Revision: https://reviews.llvm.org/D140908
This commit is contained in:
Teresa Johnson
2022-12-29 12:11:38 -08:00
parent ee5617dc71
commit d6ad4f01c3
14 changed files with 3037 additions and 184 deletions

View File

@@ -0,0 +1,38 @@
//==- MemProfContextDisambiguation.h - Context Disambiguation ----*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Implements support for context disambiguation of allocation calls for profile
// guided heap optimization using memprof metadata. See implementation file for
// details.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
class Module;
class MemProfContextDisambiguation
: public PassInfoMixin<MemProfContextDisambiguation> {
/// Run the context disambiguator on \p M, returns true if any changes made.
bool processModule(Module &M);
public:
MemProfContextDisambiguation() {}
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H

View File

@@ -117,6 +117,7 @@
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/IPO/ModuleInliner.h"
#include "llvm/Transforms/IPO/OpenMPOpt.h"

View File

@@ -57,6 +57,7 @@
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/IPO/ModuleInliner.h"
#include "llvm/Transforms/IPO/OpenMPOpt.h"
@@ -271,6 +272,10 @@ static cl::opt<AttributorRunOption> AttributorRun(
clEnumValN(AttributorRunOption::NONE, "none",
"disable attributor runs")));
cl::opt<bool> EnableMemProfContextDisambiguation(
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
PipelineTuningOptions::PipelineTuningOptions() {
LoopInterleaving = true;
LoopVectorization = true;
@@ -1709,6 +1714,12 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
InlinePass::CGSCCInliner}));
// Perform context disambiguation after inlining, since that would reduce the
// amount of additional cloning required to distinguish the allocation
// contexts.
if (EnableMemProfContextDisambiguation)
MPM.addPass(MemProfContextDisambiguation());
// Optimize globals again after we ran the inliner.
MPM.addPass(GlobalOptPass());

View File

@@ -87,6 +87,7 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
MODULE_PASS("no-op-module", NoOpModulePass())
MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
MODULE_PASS("partial-inliner", PartialInlinerPass())
MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion())
MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen())
MODULE_PASS("pgo-instr-use", PGOInstrumentationUse())

View File

@@ -27,6 +27,7 @@ add_llvm_component_library(LLVMipo
Internalize.cpp
LoopExtractor.cpp
LowerTypeTests.cpp
MemProfContextDisambiguation.cpp
MergeFunctions.cpp
ModuleInliner.cpp
OpenMPOpt.cpp

File diff suppressed because it is too large Load Diff

View File

@@ -1,184 +0,0 @@
;; Check memprof summaries (per module, combined index, and distributed indexes)
; RUN: split-file %s %t
; RUN: opt -module-summary %t/a.ll -o %ta.bc
; RUN: opt -module-summary %t/b.ll -o %tb.bc
; RUN: llvm-dis -o - %ta.bc | FileCheck %s --check-prefix=PRELINKDISA
; PRELINKDISA: gv: (name: "main", {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) ; guid = 15822663052811949562
; RUN: llvm-dis -o - %tb.bc | FileCheck %s --check-prefix=PRELINKDISB
; PRELINKDISB: ^[[PLBAR:[0-9]+]] = gv: (name: "_Z3barv", {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) ; guid = 4555904644815367798
; PRELINKDISB: ^[[PLFOO:[0-9]+]] = gv: (name: "_Z3foov", {{.*}} callsites: ((callee: ^[[PLBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) ; guid = 9191153033785521275
; PRELINKDISB: ^[[PLBAZ]] = gv: (name: "_Z3bazv", {{.*}} callsites: ((callee: ^[[PLBAR]], clones: (0), stackIds: (12481870273128938184)))))) ; guid = 15176620447596392000
; RUN: llvm-bcanalyzer -dump %ta.bc | FileCheck %s --check-prefix=PRELINKBCANA
; PRELINKBCANA: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178/>
; RUN: llvm-bcanalyzer -dump %tb.bc | FileCheck %s --check-prefix=PRELINKBCANB
; PRELINKBCANB: <STACK_IDS abbrevid=4 op0=-5964873800580613432 op1=2732490490862098848 op2=8632435727821051414 op3=-3421689549917153178/>
; RUN: llvm-lto2 run %ta.bc %tb.bc -o %t -save-temps \
; RUN: -thinlto-distributed-indexes \
; RUN: -r=%ta.bc,main,plx \
; RUN: -r=%ta.bc,_Z3foov, \
; RUN: -r=%ta.bc,free, \
; RUN: -r=%ta.bc,sleep, \
; RUN: -r=%tb.bc,_Z3foov,pl \
; RUN: -r=%tb.bc,_Znam, \
; RUN: -r=%tb.bc,_Z3bazv,pl
; RUN: llvm-dis -o - %t.index.bc | FileCheck %s --check-prefix=COMBINEDDIS
; COMBINEDDIS: ^[[COMBBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438))))))))
; COMBINEDDIS: ^[[COMBFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[COMBBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848))))))
; COMBINEDDIS: ^[[COMBBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[COMBBAR]], clones: (0), stackIds: (12481870273128938184))))))
; COMBINEDDIS: ^[[COMBMAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[COMBFOO]], clones: (0), stackIds: (8632435727821051414)), (callee: ^[[COMBFOO]], clones: (0), stackIds: (15025054523792398438))))))
; RUN: llvm-bcanalyzer -dump %t.index.bc | FileCheck %s --check-prefix=COMBINEDBCAN
; COMBINEDBCAN: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=-5964873800580613432 op3=2732490490862098848/>
; RUN: llvm-dis -o - %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISA
; DISTRIBUTEDDISA: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: null, clones: (0), stackIds: (2732490490862098848))))))
; DISTRIBUTEDDISA: gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438))))))
; RUN: llvm-dis -o - %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISB
; DISTRIBUTEDDISB: ^[[DISTRBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438))))))))
; DISTRIBUTEDDISB: ^[[DISTRFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[DISTRBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848))))))
; DISTRIBUTEDDISB: ^[[DISTRBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[DISTRBAR]], clones: (0), stackIds: (12481870273128938184))))))
; RUN: llvm-bcanalyzer -dump %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANA
; DISTRIBUTEDBCANA: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=2732490490862098848/>
; RUN: llvm-bcanalyzer -dump %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANB
; DISTRIBUTEDBCANB: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=-5964873800580613432 op3=2732490490862098848/>
;--- a.ll
; ModuleID = 'a.cc'
source_filename = "a.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: mustprogress norecurse uwtable
define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 !dbg !39 {
entry:
%call = call noundef ptr @_Z3foov(), !dbg !42, !callsite !43
%call1 = call noundef ptr @_Z3foov(), !dbg !44, !callsite !45
call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call, i8 0, i64 10, i1 false), !dbg !46
call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call1, i8 0, i64 10, i1 false), !dbg !47
call void @free(ptr noundef %call) #4, !dbg !48
%call2 = call i32 @sleep(i32 noundef 10), !dbg !49
call void @free(ptr noundef %call1) #4, !dbg !50
ret i32 0, !dbg !51
}
declare !dbg !52 noundef ptr @_Z3foov() local_unnamed_addr #1
; Function Attrs: argmemonly mustprogress nocallback nofree nounwind willreturn writeonly
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
; Function Attrs: inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free")
declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #3
declare !dbg !53 i32 @sleep(i32 noundef) local_unnamed_addr #1
attributes #0 = { mustprogress norecurse uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { argmemonly mustprogress nocallback nofree nounwind willreturn writeonly }
attributes #3 = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #4 = { nounwind }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
!1 = !DIFile(filename: "a.cc", directory: ".", checksumkind: CSK_MD5, checksum: "ebabd56909271a1d4a7cac81c10624d5")
!2 = !{i32 7, !"Dwarf Version", i32 5}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 8, !"PIC Level", i32 2}
!6 = !{i32 7, !"PIE Level", i32 2}
!7 = !{i32 7, !"uwtable", i32 2}
!8 = !{i32 7, !"frame-pointer", i32 2}
!39 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
!40 = !DISubroutineType(types: !41)
!41 = !{}
!42 = !DILocation(line: 6, column: 13, scope: !39)
!43 = !{i64 8632435727821051414}
!44 = !DILocation(line: 7, column: 13, scope: !39)
!45 = !{i64 -3421689549917153178}
!46 = !DILocation(line: 8, column: 3, scope: !39)
!47 = !DILocation(line: 9, column: 3, scope: !39)
!48 = !DILocation(line: 10, column: 3, scope: !39)
!49 = !DILocation(line: 11, column: 3, scope: !39)
!50 = !DILocation(line: 12, column: 3, scope: !39)
!51 = !DILocation(line: 13, column: 3, scope: !39)
!52 = !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41)
!53 = !DISubprogram(name: "sleep", scope: !54, file: !54, line: 453, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41)
!54 = !DIFile(filename: "include/unistd.h", directory: "/usr", checksumkind: CSK_MD5, checksum: "ee8f41a17f563f029d0e930ad871815a")
;--- b.ll
; ModuleID = 'b.cc'
source_filename = "b.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: mustprogress noinline uwtable
define internal noalias noundef nonnull ptr @_Z3barv() local_unnamed_addr #0 !dbg !39 {
entry:
%call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #2, !dbg !42, !memprof !43, !callsite !48
ret ptr %call, !dbg !49
}
; Function Attrs: nobuiltin allocsize(0)
declare noundef nonnull ptr @_Znam(i64 noundef) local_unnamed_addr #1
; Function Attrs: mustprogress noinline uwtable
define dso_local noalias noundef nonnull ptr @_Z3bazv() local_unnamed_addr #0 !dbg !50 {
entry:
%call = call noundef ptr @_Z3barv(), !dbg !51, !callsite !52
ret ptr %call, !dbg !53
}
; Function Attrs: mustprogress uwtable
define dso_local noalias noundef nonnull ptr @_Z3foov() local_unnamed_addr #3 !dbg !54 {
entry:
%call = call noundef ptr @_Z3bazv(), !dbg !55, !callsite !56
ret ptr %call, !dbg !57
}
attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { builtin allocsize(0) }
attributes #3 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
!1 = !DIFile(filename: "b.cc", directory: ".", checksumkind: CSK_MD5, checksum: "335f81d275af57725cfc9ffc7be49bc2")
!2 = !{i32 7, !"Dwarf Version", i32 5}
!3 = !{i32 2, !"Debug Info Version", i32 3}
!4 = !{i32 1, !"wchar_size", i32 4}
!5 = !{i32 8, !"PIC Level", i32 2}
!6 = !{i32 7, !"PIE Level", i32 2}
!7 = !{i32 7, !"uwtable", i32 2}
!8 = !{i32 7, !"frame-pointer", i32 2}
!39 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, type: !40, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
!40 = !DISubroutineType(types: !41)
!41 = !{}
!42 = !DILocation(line: 2, column: 10, scope: !39)
!43 = !{!44, !46}
!44 = !{!45, !"notcold"}
!45 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
!46 = !{!47, !"cold"}
!47 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!48 = !{i64 9086428284934609951}
!49 = !DILocation(line: 2, column: 3, scope: !39)
!50 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
!51 = !DILocation(line: 6, column: 10, scope: !50)
!52 = !{i64 -5964873800580613432}
!53 = !DILocation(line: 6, column: 3, scope: !50)
!54 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 9, type: !40, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
!55 = !DILocation(line: 10, column: 10, scope: !54)
!56 = !{i64 2732490490862098848}
!57 = !DILocation(line: 10, column: 3, scope: !54)

View File

@@ -0,0 +1,158 @@
;; Test callsite context graph generation for simple call graph with
;; two memprof contexts and no inlining.
;;
;; Original code looks like:
;;
;; char *bar() {
;; return new char[10];
;; }
;;
;; char *baz() {
;; return bar();
;; }
;;
;; char *foo() {
;; return baz();
;; }
;;
;; int main(int argc, char **argv) {
;; char *x = foo();
;; char *y = foo();
;; memset(x, 0, 10);
;; memset(y, 0, 10);
;; delete[] x;
;; sleep(10);
;; delete[] y;
;; return 0;
;; }
;;
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
;; memory freed after sleep(10) results in cold lifetimes.
;;
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i32 @main() #0 {
entry:
%call = call noundef ptr @_Z3foov(), !callsite !0
%call1 = call noundef ptr @_Z3foov(), !callsite !1
ret i32 0
}
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
; Function Attrs: nobuiltin
declare void @_ZdaPv() #2
define internal ptr @_Z3barv() #3 {
entry:
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
ret ptr null
}
declare ptr @_Znam(i64)
define internal ptr @_Z3bazv() #4 {
entry:
%call = call noundef ptr @_Z3barv(), !callsite !8
ret ptr null
}
; Function Attrs: noinline
define internal ptr @_Z3foov() #5 {
entry:
%call = call noundef ptr @_Z3bazv(), !callsite !9
ret ptr null
}
; uselistorder directives
uselistorder ptr @_Z3foov, { 1, 0 }
attributes #0 = { "tune-cpu"="generic" }
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #2 = { nobuiltin }
attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
attributes #4 = { "stack-protector-buffer-size"="8" }
attributes #5 = { noinline }
attributes #6 = { builtin }
!0 = !{i64 8632435727821051414}
!1 = !{i64 -3421689549917153178}
!2 = !{!3, !5}
!3 = !{!4, !"notcold"}
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
!5 = !{!6, !"cold"}
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!7 = !{i64 9086428284934609951}
!8 = !{i64 -5964873800580613432}
!9 = !{i64 2732490490862098848}
; DUMP: CCG before cloning:
; DUMP: Callsite Context Graph:
; DUMP: Node [[BAR:0x[a-z0-9]+]]
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
; DUMP: Node [[BAZ]]
; DUMP: %call = call noundef ptr @_Z3barv() (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
; DUMP: Node [[FOO]]
; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
; DUMP: Node [[MAIN1]]
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 1
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
; DUMP: CallerEdges:
; DUMP: Node [[MAIN2]]
; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 2
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
; DUMP: CallerEdges:
; DOT: digraph "postbuild" {
; DOT: label="postbuild";
; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
; DOT: }

View File

@@ -0,0 +1,232 @@
;; Test callsite context graph generation for call graph with with MIBs
;; that have pruned contexts that partially match multiple inlined
;; callsite contexts, requiring duplication of context ids and nodes
;; while matching callsite nodes onto the graph.
;;
;; Original code looks like:
;;
;; char *D() {
;; return new char[10];
;; }
;;
;; char *F() {
;; return D();
;; }
;;
;; char *C() {
;; return D();
;; }
;;
;; char *B() {
;; return C();
;; }
;;
;; char *E() {
;; return C();
;; }
;; int main(int argc, char **argv) {
;; char *x = B(); // cold
;; char *y = E(); // cold
;; char *z = F(); // default
;; memset(x, 0, 10);
;; memset(y, 0, 10);
;; memset(z, 0, 10);
;; delete[] z;
;; sleep(10);
;; delete[] x;
;; delete[] y;
;; return 0;
;; }
;;
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
;; memory freed after sleep(10) results in cold lifetimes.
;;
;; The code below was created by forcing inlining of C into both B and E.
;; Since both allocation contexts via C are cold, the matched memprof
;; metadata has the context pruned above C's callsite. This requires
;; matching the stack node for C to callsites where it was inlined (i.e.
;; the callsites in B and E that have callsite metadata that includes C's).
;; It also requires duplication of that node in the graph as well as the
;; duplication of the context ids along that path through the graph,
;; so that we can represent the duplicated (via inlining) C callsite.
;;
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define internal ptr @_Z1Dv() {
entry:
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5
ret ptr null
}
declare ptr @_Znam(i64)
define internal ptr @_Z1Fv() #0 {
entry:
%call = call noundef ptr @_Z1Dv(), !callsite !6
ret ptr null
}
; Function Attrs: mustprogress noinline optnone uwtable
define internal ptr @_Z1Cv() #1 {
entry:
%call = call noundef ptr @_Z1Dv(), !callsite !7
ret ptr null
}
; Function Attrs: mustprogress noinline optnone uwtable
define internal ptr @_Z1Bv() #1 {
entry:
%call.i = call noundef ptr @_Z1Dv(), !callsite !8
ret ptr null
}
; Function Attrs: mustprogress noinline optnone uwtable
define internal ptr @_Z1Ev() #1 {
entry:
%call.i = call noundef ptr @_Z1Dv(), !callsite !9
ret ptr null
}
; Function Attrs: noinline
declare i32 @main() #2
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
; Function Attrs: nounwind
declare void @_ZdaPv() #4
declare i32 @sleep() #5
attributes #0 = { "disable-tail-calls"="true" }
attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { noinline }
attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #4 = { nounwind }
attributes #5 = { "no-trapping-math"="true" }
attributes #6 = { builtin }
!0 = !{!1, !3}
!1 = !{!2, !"cold"}
!2 = !{i64 6541423618768552252, i64 -6270142974039008131}
!3 = !{!4, !"notcold"}
!4 = !{i64 6541423618768552252, i64 -4903163940066524832}
!5 = !{i64 6541423618768552252}
!6 = !{i64 -4903163940066524832}
!7 = !{i64 -6270142974039008131}
!8 = !{i64 -6270142974039008131, i64 -184525619819294889}
!9 = !{i64 -6270142974039008131, i64 1905834578520680781}
;; After adding only the alloc node memprof metadata, we only have 2 contexts.
; DUMP: CCG before updating call stack chains:
; DUMP: Callsite Context Graph:
; DUMP: Node [[D:0x[a-z0-9]+]]
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
; DUMP: Node [[C]]
; DUMP: null Call
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 1
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1
; DUMP: CallerEdges:
; DUMP: Node [[F]]
; DUMP: null Call
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 2
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
; DUMP: CallerEdges:
;; After updating for callsite metadata, we should have generated context ids 3 and 4,
;; along with 2 new nodes for those callsites. All have the same allocation type
;; behavior as the original C node.
; DUMP: CCG before cloning:
; DUMP: Callsite Context Graph:
; DUMP: Node [[D]]
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2 3 4
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
; DUMP: Node [[F]]
; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 2
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
; DUMP: CallerEdges:
; DUMP: Node [[C2]]
; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 3
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
; DUMP: CallerEdges:
; DUMP: Node [[B]]
; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 4
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
; DUMP: CallerEdges:
; DUMP: Node [[E]]
; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 1
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
; DUMP: CallerEdges:
; DOTPRE: digraph "prestackupdate" {
; DOTPRE: label="prestackupdate";
; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"];
; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"];
; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
; DOTPRE: }
; DOTPOST:digraph "postbuild" {
; DOTPOST: label="postbuild";
; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"];
; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"];
; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
; DOTPOST:}

View File

@@ -0,0 +1,386 @@
;; Test callsite context graph generation for call graph with with MIBs
;; that have pruned contexts that partially match multiple inlined
;; callsite contexts, requiring duplication of context ids and nodes
;; while matching callsite nodes onto the graph. This test requires more
;; complex duplication due to multiple contexts for different allocations
;; that share some of the same callsite nodes.
;;
;; Original code looks like:
;;
;; char *D(bool Call1) {
;; if (Call1)
;; return new char[10];
;; else
;; return new char[10];
;; }
;;
;; char *C(bool Call1) {
;; return D(Call1);
;; }
;;
;; char *B(bool Call1) {
;; if (Call1)
;; return C(true);
;; else
;; return C(false);
;; }
;;
;; char *A(bool Call1) {
;; return B(Call1);
;; }
;;
;; char *A1() {
;; return A(true);
;; }
;;
;; char *A2() {
;; return A(true);
;; }
;;
;; char *A3() {
;; return A(false);
;; }
;;
;; char *A4() {
;; return A(false);
;; }
;;
;; char *E() {
;; return B(true);
;; }
;;
;; char *F() {
;; return B(false);
;; }
;;
;; int main(int argc, char **argv) {
;; char *a1 = A1(); // cold
;; char *a2 = A2(); // cold
;; char *e = E(); // default
;; char *a3 = A3(); // default
;; char *a4 = A4(); // default
;; char *f = F(); // cold
;; memset(a1, 0, 10);
;; memset(a2, 0, 10);
;; memset(e, 0, 10);
;; memset(a3, 0, 10);
;; memset(a4, 0, 10);
;; memset(f, 0, 10);
;; delete[] a3;
;; delete[] a4;
;; delete[] e;
;; sleep(10);
;; delete[] a1;
;; delete[] a2;
;; delete[] f;
;; return 0;
;; }
;;
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
;; memory freed after sleep(10) results in cold lifetimes.
;;
;; The code below was created by forcing inlining of A into its callers,
;; without any other inlining or optimizations. Since both allocation contexts
;; via A for each allocation in D have the same allocation type (cold via
;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second
;; new in D, the contexts for those respective allocations are pruned above A.
;; The allocations via E and F are to ensure we don't prune above B.
;;
;; The matching onto the inlined A[1234]->A sequences will require duplication
;; of the context id assigned to the context from A for each allocation in D.
;; This test ensures that we do this correctly in the presence of callsites
;; shared by the different duplicated context ids (i.e. callsite in C).
;;
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: mustprogress noinline uwtable
define ptr @_Z1Db(i1 %Call1) #0 {
entry:
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5
br label %return
if.else: ; No predecessors!
%call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11
br label %return
return: ; preds = %if.else, %entry
ret ptr null
}
; Function Attrs: nobuiltin
declare ptr @_Znam(i64) #1
define ptr @_Z1Cb(i1 %Call1) {
entry:
%tobool = trunc i8 0 to i1
%call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12
ret ptr null
}
; Function Attrs: mustprogress noinline uwtable
define ptr @_Z1Bb(i1 %Call1) #0 {
entry:
%call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13
br label %return
if.else: ; No predecessors!
%call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14
br label %return
return: ; preds = %if.else, %entry
ret ptr null
}
define ptr @_Z1Ab(i1 %tobool) #2 {
entry:
%call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15
ret ptr null
}
; Function Attrs: mustprogress noinline uwtable
define ptr @_Z2A1v(i1 %tobool.i) #0 {
entry:
%call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16
ret ptr null
}
; Function Attrs: mustprogress noinline uwtable
define ptr @_Z2A2v(i1 %tobool.i) #0 {
entry:
%call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17
ret ptr null
}
; Function Attrs: mustprogress noinline uwtable
define ptr @_Z2A3v(i1 %tobool.i) #0 {
entry:
%call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18
ret ptr null
}
; Function Attrs: mustprogress noinline uwtable
define ptr @_Z2A4v(i1 %tobool.i) #0 {
entry:
%call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19
ret ptr null
}
; Function Attrs: mustprogress noinline uwtable
define ptr @_Z1Ev() #0 {
entry:
%call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20
ret ptr null
}
; Function Attrs: mustprogress noinline uwtable
define ptr @_Z1Fv() #0 {
entry:
%call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21
ret ptr null
}
; Function Attrs: noinline
declare i32 @main() #3
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
declare void @_ZdaPv() #5
declare i32 @sleep() #6
; uselistorder directives
uselistorder ptr @_Znam, { 1, 0 }
attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nobuiltin }
attributes #2 = { "tune-cpu"="generic" }
attributes #3 = { noinline }
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
attributes #6 = { "disable-tail-calls"="true" }
attributes #7 = { builtin allocsize(0) }
!0 = !{!1, !3}
!1 = !{!2, !"notcold"}
!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781}
!3 = !{!4, !"cold"}
!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978}
!5 = !{i64 4854880825882961848}
!6 = !{!7, !9}
!7 = !{!8, !"notcold"}
!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978}
!9 = !{!10, !"cold"}
!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832}
!11 = !{i64 -8775068539491628272}
!12 = !{i64 -904694911315397047}
!13 = !{i64 6532298921261778285}
!14 = !{i64 7859682663773658275}
!15 = !{i64 -6528110295079665978}
!16 = !{i64 -6528110295079665978, i64 5747919905719679568}
!17 = !{i64 -6528110295079665978, i64 -5753238080028016843}
!18 = !{i64 -6528110295079665978, i64 1794685869326395337}
!19 = !{i64 -6528110295079665978, i64 5462047985461644151}
!20 = !{i64 1905834578520680781}
!21 = !{i64 -4903163940066524832}
;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only
;; match the interesting parts of the pre-update graph here).
; DUMP: CCG before updating call stack chains:
; DUMP: Callsite Context Graph:
; DUMP: Node [[D1:0x[a-z0-9]+]]
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2
; DUMP: Node [[C:0x[a-z0-9]+]]
; DUMP: null Call
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2 3 4
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2
; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4
; DUMP: Node [[D2]]
; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 3 4
;; After updating for callsite metadata, we should have duplicated the context
;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A,
;; and used those on new nodes for those callers. Note that while in reality
;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4,
;; due to the pruning we have lost this information and thus end up duplicating
;; both of A's contexts to all of the new nodes (which could result in some
;; unnecessary cloning.
; DUMP: CCG before cloning:
; DUMP: Callsite Context Graph:
; DUMP: Node [[D1]]
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2 5 7 9 11
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
; DUMP: Node [[C]]
; DUMP: %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool) (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
; DUMP: Node [[B1]]
; DUMP: %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true) (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2 5 7 9 11
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5
; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7
; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9
; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11
; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
; DUMP: Node [[E]]
; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true) (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 1
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1
; DUMP: CallerEdges:
; DUMP: Node [[D2]]
; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 3 4 6 8 10 12
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
; DUMP: Node [[B2]]
; DUMP: %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false) (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 3 4 6 8 10 12
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
; DUMP: Node [[F]]
; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false) (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 4
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4
; DUMP: CallerEdges:
; DUMP: Node [[A2]]
; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool) (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 5 6
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5
; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
; DUMP: CallerEdges:
; DUMP: Node [[A3]]
; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 7 8
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7
; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
; DUMP: CallerEdges:
; DUMP: Node [[A1]]
; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 9 10
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9
; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
; DUMP: CallerEdges:
; DUMP: Node [[A4]]
; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 11 12
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11
; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
; DUMP: CallerEdges:
; DUMP: Node [[A]]
; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 2 3
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2
; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
; DUMP: CallerEdges:

View File

@@ -0,0 +1,261 @@
;; Tests callsite context graph generation for call graph containing indirect
;; calls. Currently this should result in conservative behavior, such that the
;; indirect call receives a null call in its graph node, to prevent subsequent
;; cloning.
;;
;; Original code looks like:
;;
;; char *foo() {
;; return new char[10];
;; }
;; class A {
;; public:
;; virtual char *x() { return foo(); }
;; };
;; class B : public A {
;; public:
;; char *x() final { return foo(); }
;; };
;; char *bar(A *a) {
;; return a->x();
;; }
;; int main(int argc, char **argv) {
;; char *x = foo();
;; char *y = foo();
;; B b;
;; char *z = bar(&b);
;; char *w = bar(&b);
;; A a;
;; char *r = bar(&a);
;; char *s = bar(&a);
;; memset(x, 0, 10);
;; memset(y, 0, 10);
;; memset(z, 0, 10);
;; memset(w, 0, 10);
;; memset(r, 0, 10);
;; memset(s, 0, 10);
;; delete[] x;
;; delete[] w;
;; delete[] r;
;; sleep(10);
;; delete[] y;
;; delete[] z;
;; delete[] s;
;; return 0;
;; }
;;
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
;; memory freed after sleep(10) results in cold lifetimes.
;;
;; Compiled without optimization to prevent inlining and devirtualization.
;;
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
declare ptr @_Z3barP1A(ptr)
define i32 @main(ptr %b, ptr %a) #0 {
entry:
%call = call noundef ptr @_Z3foov(), !callsite !0
%call1 = call noundef ptr @_Z3foov(), !callsite !1
%call2 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !2
%call3 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !3
%call4 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !4
%call5 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !5
ret i32 0
}
; Function Attrs: noinline
declare void @_ZN1BC2Ev() #1
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
; Function Attrs: nobuiltin
declare void @_ZdaPv() #3
define internal ptr @_ZN1A1xEv() #4 {
entry:
%call = call noundef ptr @_Z3foov(), !callsite !6
ret ptr null
}
; Function Attrs: mustprogress uwtable
define internal ptr @_ZN1B1xEv() #5 {
entry:
%call = call noundef ptr @_Z3foov(), !callsite !7
ret ptr null
}
; Function Attrs: mustprogress uwtable
define internal ptr @_Z3foov() #5 {
entry:
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !8, !callsite !21
ret ptr null
}
declare ptr @_Znam(i64) #6
; uselistorder directives
uselistorder ptr @_Z3foov, { 3, 2, 1, 0 }
attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
attributes #1 = { noinline }
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #3 = { nobuiltin }
attributes #4 = { "tune-cpu"="generic" }
attributes #5 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #6 = { "disable-tail-calls"="true" }
attributes #7 = { builtin }
!0 = !{i64 8632435727821051414}
!1 = !{i64 -3421689549917153178}
!2 = !{i64 6792096022461663180}
!3 = !{i64 -2709642582978494015}
!4 = !{i64 748269490701775343}
!5 = !{i64 -5747251260480066785}
!6 = !{i64 8256774051149711748}
!7 = !{i64 -4831879094954754638}
!8 = !{!9, !11, !13, !15, !17, !19}
!9 = !{!10, !"notcold"}
!10 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 748269490701775343}
!11 = !{!12, !"cold"}
!12 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 -5747251260480066785}
!13 = !{!14, !"notcold"}
!14 = !{i64 2732490490862098848, i64 8632435727821051414}
!15 = !{!16, !"cold"}
!16 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 6792096022461663180}
!17 = !{!18, !"notcold"}
!18 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 -2709642582978494015}
!19 = !{!20, !"cold"}
!20 = !{i64 2732490490862098848, i64 -3421689549917153178}
!21 = !{i64 2732490490862098848}
; DUMP: CCG before cloning:
; DUMP: Callsite Context Graph:
; DUMP: Node [[FOO:0x[a-z0-9]+]]
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2 3 4 5 6
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[FOO]] to Caller: [[AX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
; DUMP: Edge from Callee [[FOO]] to Caller: [[BX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 4 5
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6
; DUMP: Node [[AX]]
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[AX]] to Caller: [[BAR:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
;; Bar contains an indirect call, with multiple targets. It's call should be null.
; DUMP: Node [[BAR]]
; DUMP: null Call
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2 4 5
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5
; DUMP: Node [[MAIN3]]
; DUMP: %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 1
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
; DUMP: CallerEdges:
; DUMP: Node [[MAIN4]]
; DUMP: %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 2
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
; DUMP: CallerEdges:
; DUMP: Node [[MAIN1]]
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 3
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
; DUMP: CallerEdges:
; DUMP: Node [[BX]]
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 4 5
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
; DUMP: Node [[MAIN5]]
; DUMP: %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 4
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
; DUMP: CallerEdges:
; DUMP: Node [[MAIN6]]
; DUMP: %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 5
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
; DUMP: CallerEdges:
; DUMP: Node [[MAIN2]]
; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 6
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
; DUMP: CallerEdges:
; DOT: digraph "postbuild" {
; DOT: label="postbuild";
; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"];
; DOT: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"];
; DOT: Node[[AX]] -> Node[[FOO]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"];
; DOT: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
; DOT: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"];
; DOT: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"];
; DOT: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"];
; DOT: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
; DOT: Node[[MAIN3]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"];
; DOT: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"];
; DOT: Node[[BX]] -> Node[[FOO]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
; DOT: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"];
; DOT: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"];
; DOT: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"];
; DOT: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"];
; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"];
; DOT: }

View File

@@ -0,0 +1,189 @@
;; Test callsite context graph generation for call graph with two memprof
;; contexts and partial inlining, requiring generation of a new fused node to
;; represent the inlined sequence while matching callsite nodes onto the graph.
;;
;; Original code looks like:
;;
;; char *bar() {
;; return new char[10];
;; }
;;
;; char *baz() {
;; return bar();
;; }
;;
;; char *foo() {
;; return baz();
;; }
;;
;; int main(int argc, char **argv) {
;; char *x = foo();
;; char *y = foo();
;; memset(x, 0, 10);
;; memset(y, 0, 10);
;; delete[] x;
;; sleep(10);
;; delete[] y;
;; return 0;
;; }
;;
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
;; memory freed after sleep(10) results in cold lifetimes.
;;
;; The code below was created by forcing inlining of baz into foo, and
;; bar into baz. Due to the inlining of bar we will initially have two
;; allocation nodes in the graph. This tests that we correctly match
;; foo (with baz inlined) onto the graph nodes first, and generate a new
;; fused node for it. We should then not match baz (with bar inlined) as that
;; is not reached by the MIB contexts (since all calls from main will look
;; like main -> foo(+baz) -> bar after the inlining reflected in this IR).
;;
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define internal ptr @_Z3barv() {
entry:
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5
ret ptr null
}
; Function Attrs: nobuiltin
declare ptr @_Znam(i64) #0
; Function Attrs: mustprogress
define internal ptr @_Z3bazv() #1 {
entry:
%call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !6
ret ptr null
}
; Function Attrs: noinline
define internal ptr @_Z3foov() #2 {
entry:
%call.i = call noundef ptr @_Z3barv(), !callsite !7
ret ptr null
}
define i32 @main() #3 {
entry:
%call = call noundef ptr @_Z3foov(), !callsite !8
%call1 = call noundef ptr @_Z3foov(), !callsite !9
ret i32 0
}
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
; Function Attrs: nounwind
declare void @_ZdaPv() #5
declare i32 @sleep() #6
attributes #0 = { nobuiltin }
attributes #1 = { mustprogress }
attributes #2 = { noinline }
attributes #3 = { "tune-cpu"="generic" }
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #5 = { nounwind }
attributes #6 = { "disable-tail-calls"="true" }
attributes #7 = { builtin }
!0 = !{!1, !3}
!1 = !{!2, !"notcold"}
!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
!3 = !{!4, !"cold"}
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!5 = !{i64 9086428284934609951}
!6 = !{i64 9086428284934609951, i64 -5964873800580613432}
!7 = !{i64 -5964873800580613432, i64 2732490490862098848}
!8 = !{i64 8632435727821051414}
!9 = !{i64 -3421689549917153178}
; DUMP: CCG before cloning:
; DUMP: Callsite Context Graph:
; DUMP: Node [[BAR:0x[a-z0-9]+]]
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
;; This is leftover from the MIB on the alloc inlined into baz. It is not
;; matched with any call, since there is no such node in the IR. Due to the
;; null call it will not participate in any context transformations.
; DUMP: Node [[FOO2:0x[a-z0-9]+]]
; DUMP: null Call
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 3 4
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAZ:0x[a-z0-9]+]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
; DUMP: Node [[MAIN1]]
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 1 3
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
; DUMP: CallerEdges:
; DUMP: Node [[MAIN2]]
; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 2 4
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
; DUMP: CallerEdges:
; DUMP: Node [[BAZ]]
; DUMP: %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 3 4
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
;; This is the node synthesized for the call to bar in foo that was created
;; by inlining baz into foo.
; DUMP: Node [[FOO]]
; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
; DOT: digraph "postbuild" {
; DOT: label="postbuild";
; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
; DOT: Node[[FOO]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"];
; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"];
; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 4",fillcolor="cyan"];
; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"];
; DOT: Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"];
; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
; DOT: }

View File

@@ -0,0 +1,135 @@
;; Test callsite context graph generation for call graph with two memprof
;; contexts and multiple levels of inlining, requiring generation of new
;; fused nodes to represent the inlined sequence while matching callsite
;; nodes onto the graph. In particular this tests the case where a function
;; has inlined a callee containing an inlined callee.
;;
;; Original code looks like:
;;
;; char *bar() __attribute__((noinline)) {
;; return new char[10];
;; }
;;
;; char *baz() {
;; return bar();
;; }
;;
;; char *foo() {
;; return baz();
;; }
;;
;; int main(int argc, char **argv) {
;; char *x = foo();
;; char *y = foo();
;; memset(x, 0, 10);
;; memset(y, 0, 10);
;; delete[] x;
;; sleep(10);
;; delete[] y;
;; return 0;
;; }
;;
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
;; memory freed after sleep(10) results in cold lifetimes.
;;
;; Both foo and baz are inlined into main, at both foo callsites.
;; We should update the graph for new fused nodes for both of those inlined
;; callsites to bar.
;;
;; Note that baz and bar are both dead due to the inlining, but have been left
;; in the input IR to ensure that the MIB call chain is matched to the longer
;; inline sequences from main.
;;
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
; RUN: opt -passes=memprof-context-disambiguation \
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define ptr @_Z3barv() #0 {
entry:
%call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !memprof !7, !callsite !12, !heapallocsite !13
ret ptr null
}
; Function Attrs: nobuiltin
declare ptr @_Znam(i64) #1
; Function Attrs: mustprogress
declare ptr @_Z3bazv() #2
define i32 @main() #3 {
delete.end5:
%call.i.i = call noundef ptr @_Z3barv(), !callsite !14
%call.i.i8 = call noundef ptr @_Z3barv(), !callsite !15
ret i32 0
}
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
declare void @_ZdaPv() #5
declare i32 @sleep() #6
attributes #0 = { "stack-protector-buffer-size"="8" }
attributes #1 = { nobuiltin }
attributes #2 = { mustprogress }
attributes #3 = { "tune-cpu"="generic" }
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
attributes #6 = { "disable-tail-calls"="true" }
attributes #7 = { builtin }
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6}
!0 = !{i32 7, !"Dwarf Version", i32 5}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"wchar_size", i32 4}
!3 = !{i32 8, !"PIC Level", i32 2}
!4 = !{i32 7, !"PIE Level", i32 2}
!5 = !{i32 7, !"uwtable", i32 2}
!6 = !{i32 7, !"frame-pointer", i32 2}
!7 = !{!8, !10}
!8 = !{!9, !"notcold"}
!9 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
!10 = !{!11, !"cold"}
!11 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!12 = !{i64 9086428284934609951}
!13 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
!14 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
!15 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
; DUMP: CCG before cloning:
; DUMP: Callsite Context Graph:
; DUMP: Node [[BAR:0x[a-z0-9]+]]
; DUMP: %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !heapallocsite !7 (clone 0)
; DUMP: AllocTypes: NotColdCold
; DUMP: ContextIds: 1 2
; DUMP: CalleeEdges:
; DUMP: CallerEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
;; This is the node synthesized for the first inlined call chain of main->foo->baz
; DUMP: Node [[MAIN1]]
; DUMP: %call.i.i = call noundef ptr @_Z3barv() (clone 0)
; DUMP: AllocTypes: NotCold
; DUMP: ContextIds: 1
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
; DUMP: CallerEdges:
;; This is the node synthesized for the second inlined call chain of main->foo->baz
; DUMP: Node [[MAIN2]]
; DUMP: %call.i.i8 = call noundef ptr @_Z3barv() (clone 0)
; DUMP: AllocTypes: Cold
; DUMP: ContextIds: 2
; DUMP: CalleeEdges:
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
; DUMP: CallerEdges:

View File

@@ -0,0 +1,41 @@
;; Test that MemProfContextDisambiguation is enabled under the expected conditions
;; and in the expected position.
;; Pass is not currently enabled by default at any opt level.
; RUN: opt -debug-pass-manager -passes='lto<O0>' -S %s \
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
; RUN: opt -debug-pass-manager -passes='lto<O1>' -S %s \
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
; RUN: opt -debug-pass-manager -passes='lto<O2>' -S %s \
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
; RUN: opt -debug-pass-manager -passes='lto<O3>' -S %s \
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
;; Pass should not run even under option at O0/O1.
; RUN: opt -debug-pass-manager -passes='lto<O0>' -S %s \
; RUN: -enable-memprof-context-disambiguation \
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
; RUN: opt -debug-pass-manager -passes='lto<O1>' -S %s \
; RUN: -enable-memprof-context-disambiguation \
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
;; Pass should be enabled under option at O2/O3.
; RUN: opt -debug-pass-manager -passes='lto<O2>' -S %s \
; RUN: -enable-memprof-context-disambiguation \
; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED
; RUN: opt -debug-pass-manager -passes='lto<O3>' -S %s \
; RUN: -enable-memprof-context-disambiguation \
; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED
;; When enabled, MemProfContextDisambiguation runs just after inlining.
; ENABLED: Running pass: InlinerPass
; ENABLED: Invalidating analysis: InlineAdvisorAnalysis
; ENABLED: Running pass: MemProfContextDisambiguation
define noundef ptr @_Z3barv() {
entry:
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10)
ret ptr %call
}
declare noundef nonnull ptr @_Znam(i64 noundef)