mirror of
https://github.com/intel/llvm.git
synced 2026-01-28 01:04:49 +08:00
Revert "[MemProf] Context disambiguation cloning pass [patch 1a/3]"
This reverts commit d6ad4f01c3.
Fails to build on at least gcc 12.2:
/home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:482:1: error: no declaration matches ‘ContextNode<DerivedCCG, FuncTy, CallTy>* CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(const CallInfo&)’
482 | CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
/home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:393:16: note: candidate is: ‘CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::ContextNode* CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::getNodeForInst(const CallInfo&)’
393 | ContextNode *getNodeForInst(const CallInfo &C);
| ^~~~~~~~~~~~~~
/home/npopov/repos/llvm-project/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp:99:7: note: ‘class CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>’ defined here
99 | class CallsiteContextGraph {
| ^~~~~~~~~~~~~~~~~~~~
This commit is contained in:
@@ -1,38 +0,0 @@
|
||||
//==- MemProfContextDisambiguation.h - Context Disambiguation ----*- C++ -*-==//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Implements support for context disambiguation of allocation calls for profile
|
||||
// guided heap optimization using memprof metadata. See implementation file for
|
||||
// details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
|
||||
#define LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/StringSet.h"
|
||||
#include "llvm/IR/GlobalValue.h"
|
||||
#include "llvm/IR/PassManager.h"
|
||||
|
||||
namespace llvm {
|
||||
class Module;
|
||||
|
||||
class MemProfContextDisambiguation
|
||||
: public PassInfoMixin<MemProfContextDisambiguation> {
|
||||
/// Run the context disambiguator on \p M, returns true if any changes made.
|
||||
bool processModule(Module &M);
|
||||
|
||||
public:
|
||||
MemProfContextDisambiguation() {}
|
||||
|
||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
|
||||
};
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_TRANSFORMS_IPO_MEMPROF_CONTEXT_DISAMBIGUATION_H
|
||||
@@ -117,7 +117,6 @@
|
||||
#include "llvm/Transforms/IPO/Internalize.h"
|
||||
#include "llvm/Transforms/IPO/LoopExtractor.h"
|
||||
#include "llvm/Transforms/IPO/LowerTypeTests.h"
|
||||
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
|
||||
#include "llvm/Transforms/IPO/MergeFunctions.h"
|
||||
#include "llvm/Transforms/IPO/ModuleInliner.h"
|
||||
#include "llvm/Transforms/IPO/OpenMPOpt.h"
|
||||
|
||||
@@ -57,7 +57,6 @@
|
||||
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
|
||||
#include "llvm/Transforms/IPO/Inliner.h"
|
||||
#include "llvm/Transforms/IPO/LowerTypeTests.h"
|
||||
#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
|
||||
#include "llvm/Transforms/IPO/MergeFunctions.h"
|
||||
#include "llvm/Transforms/IPO/ModuleInliner.h"
|
||||
#include "llvm/Transforms/IPO/OpenMPOpt.h"
|
||||
@@ -272,10 +271,6 @@ static cl::opt<AttributorRunOption> AttributorRun(
|
||||
clEnumValN(AttributorRunOption::NONE, "none",
|
||||
"disable attributor runs")));
|
||||
|
||||
cl::opt<bool> EnableMemProfContextDisambiguation(
|
||||
"enable-memprof-context-disambiguation", cl::init(false), cl::Hidden,
|
||||
cl::ZeroOrMore, cl::desc("Enable MemProf context disambiguation"));
|
||||
|
||||
PipelineTuningOptions::PipelineTuningOptions() {
|
||||
LoopInterleaving = true;
|
||||
LoopVectorization = true;
|
||||
@@ -1714,12 +1709,6 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
|
||||
InlineContext{ThinOrFullLTOPhase::FullLTOPostLink,
|
||||
InlinePass::CGSCCInliner}));
|
||||
|
||||
// Perform context disambiguation after inlining, since that would reduce the
|
||||
// amount of additional cloning required to distinguish the allocation
|
||||
// contexts.
|
||||
if (EnableMemProfContextDisambiguation)
|
||||
MPM.addPass(MemProfContextDisambiguation());
|
||||
|
||||
// Optimize globals again after we ran the inliner.
|
||||
MPM.addPass(GlobalOptPass());
|
||||
|
||||
|
||||
@@ -87,7 +87,6 @@ MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
|
||||
MODULE_PASS("no-op-module", NoOpModulePass())
|
||||
MODULE_PASS("objc-arc-apelim", ObjCARCAPElimPass())
|
||||
MODULE_PASS("partial-inliner", PartialInlinerPass())
|
||||
MODULE_PASS("memprof-context-disambiguation", MemProfContextDisambiguation())
|
||||
MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion())
|
||||
MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen())
|
||||
MODULE_PASS("pgo-instr-use", PGOInstrumentationUse())
|
||||
|
||||
@@ -27,7 +27,6 @@ add_llvm_component_library(LLVMipo
|
||||
Internalize.cpp
|
||||
LoopExtractor.cpp
|
||||
LowerTypeTests.cpp
|
||||
MemProfContextDisambiguation.cpp
|
||||
MergeFunctions.cpp
|
||||
ModuleInliner.cpp
|
||||
OpenMPOpt.cpp
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
184
llvm/test/ThinLTO/X86/memprof-summary.ll
Normal file
184
llvm/test/ThinLTO/X86/memprof-summary.ll
Normal file
@@ -0,0 +1,184 @@
|
||||
;; Check memprof summaries (per module, combined index, and distributed indexes)
|
||||
|
||||
; RUN: split-file %s %t
|
||||
; RUN: opt -module-summary %t/a.ll -o %ta.bc
|
||||
; RUN: opt -module-summary %t/b.ll -o %tb.bc
|
||||
|
||||
; RUN: llvm-dis -o - %ta.bc | FileCheck %s --check-prefix=PRELINKDISA
|
||||
; PRELINKDISA: gv: (name: "main", {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) ; guid = 15822663052811949562
|
||||
|
||||
; RUN: llvm-dis -o - %tb.bc | FileCheck %s --check-prefix=PRELINKDISB
|
||||
; PRELINKDISB: ^[[PLBAR:[0-9]+]] = gv: (name: "_Z3barv", {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) ; guid = 4555904644815367798
|
||||
; PRELINKDISB: ^[[PLFOO:[0-9]+]] = gv: (name: "_Z3foov", {{.*}} callsites: ((callee: ^[[PLBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848)))))) ; guid = 9191153033785521275
|
||||
; PRELINKDISB: ^[[PLBAZ]] = gv: (name: "_Z3bazv", {{.*}} callsites: ((callee: ^[[PLBAR]], clones: (0), stackIds: (12481870273128938184)))))) ; guid = 15176620447596392000
|
||||
|
||||
; RUN: llvm-bcanalyzer -dump %ta.bc | FileCheck %s --check-prefix=PRELINKBCANA
|
||||
; PRELINKBCANA: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178/>
|
||||
|
||||
; RUN: llvm-bcanalyzer -dump %tb.bc | FileCheck %s --check-prefix=PRELINKBCANB
|
||||
; PRELINKBCANB: <STACK_IDS abbrevid=4 op0=-5964873800580613432 op1=2732490490862098848 op2=8632435727821051414 op3=-3421689549917153178/>
|
||||
|
||||
; RUN: llvm-lto2 run %ta.bc %tb.bc -o %t -save-temps \
|
||||
; RUN: -thinlto-distributed-indexes \
|
||||
; RUN: -r=%ta.bc,main,plx \
|
||||
; RUN: -r=%ta.bc,_Z3foov, \
|
||||
; RUN: -r=%ta.bc,free, \
|
||||
; RUN: -r=%ta.bc,sleep, \
|
||||
; RUN: -r=%tb.bc,_Z3foov,pl \
|
||||
; RUN: -r=%tb.bc,_Znam, \
|
||||
; RUN: -r=%tb.bc,_Z3bazv,pl
|
||||
|
||||
; RUN: llvm-dis -o - %t.index.bc | FileCheck %s --check-prefix=COMBINEDDIS
|
||||
; COMBINEDDIS: ^[[COMBBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438))))))))
|
||||
; COMBINEDDIS: ^[[COMBFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[COMBBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848))))))
|
||||
; COMBINEDDIS: ^[[COMBBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[COMBBAR]], clones: (0), stackIds: (12481870273128938184))))))
|
||||
; COMBINEDDIS: ^[[COMBMAIN:[0-9]+]] = gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^[[COMBFOO]], clones: (0), stackIds: (8632435727821051414)), (callee: ^[[COMBFOO]], clones: (0), stackIds: (15025054523792398438))))))
|
||||
|
||||
; RUN: llvm-bcanalyzer -dump %t.index.bc | FileCheck %s --check-prefix=COMBINEDBCAN
|
||||
; COMBINEDBCAN: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=-5964873800580613432 op3=2732490490862098848/>
|
||||
|
||||
; RUN: llvm-dis -o - %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISA
|
||||
; DISTRIBUTEDDISA: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: null, clones: (0), stackIds: (2732490490862098848))))))
|
||||
; DISTRIBUTEDDISA: gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438))))))
|
||||
|
||||
; RUN: llvm-dis -o - %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISB
|
||||
; DISTRIBUTEDDISB: ^[[DISTRBAR:[0-9]+]] = gv: (guid: 4555904644815367798, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438))))))))
|
||||
; DISTRIBUTEDDISB: ^[[DISTRFOO:[0-9]+]] = gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^[[DISTRBAZ:[0-9]+]], clones: (0), stackIds: (2732490490862098848))))))
|
||||
; DISTRIBUTEDDISB: ^[[DISTRBAZ]] = gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^[[DISTRBAR]], clones: (0), stackIds: (12481870273128938184))))))
|
||||
|
||||
; RUN: llvm-bcanalyzer -dump %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANA
|
||||
; DISTRIBUTEDBCANA: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=2732490490862098848/>
|
||||
|
||||
; RUN: llvm-bcanalyzer -dump %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANB
|
||||
; DISTRIBUTEDBCANB: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=-5964873800580613432 op3=2732490490862098848/>
|
||||
|
||||
;--- a.ll
|
||||
; ModuleID = 'a.cc'
|
||||
source_filename = "a.cc"
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: mustprogress norecurse uwtable
|
||||
define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 !dbg !39 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3foov(), !dbg !42, !callsite !43
|
||||
%call1 = call noundef ptr @_Z3foov(), !dbg !44, !callsite !45
|
||||
call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call, i8 0, i64 10, i1 false), !dbg !46
|
||||
call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call1, i8 0, i64 10, i1 false), !dbg !47
|
||||
call void @free(ptr noundef %call) #4, !dbg !48
|
||||
%call2 = call i32 @sleep(i32 noundef 10), !dbg !49
|
||||
call void @free(ptr noundef %call1) #4, !dbg !50
|
||||
ret i32 0, !dbg !51
|
||||
}
|
||||
|
||||
declare !dbg !52 noundef ptr @_Z3foov() local_unnamed_addr #1
|
||||
|
||||
; Function Attrs: argmemonly mustprogress nocallback nofree nounwind willreturn writeonly
|
||||
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
|
||||
|
||||
; Function Attrs: inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free")
|
||||
declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #3
|
||||
|
||||
declare !dbg !53 i32 @sleep(i32 noundef) local_unnamed_addr #1
|
||||
|
||||
attributes #0 = { mustprogress norecurse uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #1 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #2 = { argmemonly mustprogress nocallback nofree nounwind willreturn writeonly }
|
||||
attributes #3 = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #4 = { nounwind }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "a.cc", directory: ".", checksumkind: CSK_MD5, checksum: "ebabd56909271a1d4a7cac81c10624d5")
|
||||
!2 = !{i32 7, !"Dwarf Version", i32 5}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = !{i32 1, !"wchar_size", i32 4}
|
||||
!5 = !{i32 8, !"PIC Level", i32 2}
|
||||
!6 = !{i32 7, !"PIE Level", i32 2}
|
||||
!7 = !{i32 7, !"uwtable", i32 2}
|
||||
!8 = !{i32 7, !"frame-pointer", i32 2}
|
||||
!39 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
|
||||
!40 = !DISubroutineType(types: !41)
|
||||
!41 = !{}
|
||||
!42 = !DILocation(line: 6, column: 13, scope: !39)
|
||||
!43 = !{i64 8632435727821051414}
|
||||
!44 = !DILocation(line: 7, column: 13, scope: !39)
|
||||
!45 = !{i64 -3421689549917153178}
|
||||
!46 = !DILocation(line: 8, column: 3, scope: !39)
|
||||
!47 = !DILocation(line: 9, column: 3, scope: !39)
|
||||
!48 = !DILocation(line: 10, column: 3, scope: !39)
|
||||
!49 = !DILocation(line: 11, column: 3, scope: !39)
|
||||
!50 = !DILocation(line: 12, column: 3, scope: !39)
|
||||
!51 = !DILocation(line: 13, column: 3, scope: !39)
|
||||
!52 = !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41)
|
||||
!53 = !DISubprogram(name: "sleep", scope: !54, file: !54, line: 453, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41)
|
||||
!54 = !DIFile(filename: "include/unistd.h", directory: "/usr", checksumkind: CSK_MD5, checksum: "ee8f41a17f563f029d0e930ad871815a")
|
||||
|
||||
;--- b.ll
|
||||
; ModuleID = 'b.cc'
|
||||
source_filename = "b.cc"
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define internal noalias noundef nonnull ptr @_Z3barv() local_unnamed_addr #0 !dbg !39 {
|
||||
entry:
|
||||
%call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #2, !dbg !42, !memprof !43, !callsite !48
|
||||
ret ptr %call, !dbg !49
|
||||
}
|
||||
|
||||
; Function Attrs: nobuiltin allocsize(0)
|
||||
declare noundef nonnull ptr @_Znam(i64 noundef) local_unnamed_addr #1
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define dso_local noalias noundef nonnull ptr @_Z3bazv() local_unnamed_addr #0 !dbg !50 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3barv(), !dbg !51, !callsite !52
|
||||
ret ptr %call, !dbg !53
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress uwtable
|
||||
define dso_local noalias noundef nonnull ptr @_Z3foov() local_unnamed_addr #3 !dbg !54 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3bazv(), !dbg !55, !callsite !56
|
||||
ret ptr %call, !dbg !57
|
||||
}
|
||||
|
||||
attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #2 = { builtin allocsize(0) }
|
||||
attributes #3 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git@github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
|
||||
!1 = !DIFile(filename: "b.cc", directory: ".", checksumkind: CSK_MD5, checksum: "335f81d275af57725cfc9ffc7be49bc2")
|
||||
!2 = !{i32 7, !"Dwarf Version", i32 5}
|
||||
!3 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!4 = !{i32 1, !"wchar_size", i32 4}
|
||||
!5 = !{i32 8, !"PIC Level", i32 2}
|
||||
!6 = !{i32 7, !"PIE Level", i32 2}
|
||||
!7 = !{i32 7, !"uwtable", i32 2}
|
||||
!8 = !{i32 7, !"frame-pointer", i32 2}
|
||||
!39 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, type: !40, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
|
||||
!40 = !DISubroutineType(types: !41)
|
||||
!41 = !{}
|
||||
!42 = !DILocation(line: 2, column: 10, scope: !39)
|
||||
!43 = !{!44, !46}
|
||||
!44 = !{!45, !"notcold"}
|
||||
!45 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
|
||||
!46 = !{!47, !"cold"}
|
||||
!47 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
|
||||
!48 = !{i64 9086428284934609951}
|
||||
!49 = !DILocation(line: 2, column: 3, scope: !39)
|
||||
!50 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
|
||||
!51 = !DILocation(line: 6, column: 10, scope: !50)
|
||||
!52 = !{i64 -5964873800580613432}
|
||||
!53 = !DILocation(line: 6, column: 3, scope: !50)
|
||||
!54 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 9, type: !40, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
|
||||
!55 = !DILocation(line: 10, column: 10, scope: !54)
|
||||
!56 = !{i64 2732490490862098848}
|
||||
!57 = !DILocation(line: 10, column: 3, scope: !54)
|
||||
@@ -1,158 +0,0 @@
|
||||
;; Test callsite context graph generation for simple call graph with
|
||||
;; two memprof contexts and no inlining.
|
||||
;;
|
||||
;; Original code looks like:
|
||||
;;
|
||||
;; char *bar() {
|
||||
;; return new char[10];
|
||||
;; }
|
||||
;;
|
||||
;; char *baz() {
|
||||
;; return bar();
|
||||
;; }
|
||||
;;
|
||||
;; char *foo() {
|
||||
;; return baz();
|
||||
;; }
|
||||
;;
|
||||
;; int main(int argc, char **argv) {
|
||||
;; char *x = foo();
|
||||
;; char *y = foo();
|
||||
;; memset(x, 0, 10);
|
||||
;; memset(y, 0, 10);
|
||||
;; delete[] x;
|
||||
;; sleep(10);
|
||||
;; delete[] y;
|
||||
;; return 0;
|
||||
;; }
|
||||
;;
|
||||
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
|
||||
;; memory freed after sleep(10) results in cold lifetimes.
|
||||
;;
|
||||
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
|
||||
|
||||
; RUN: opt -passes=memprof-context-disambiguation \
|
||||
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
|
||||
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
|
||||
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
|
||||
|
||||
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3foov(), !callsite !0
|
||||
%call1 = call noundef ptr @_Z3foov(), !callsite !1
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
|
||||
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #1
|
||||
|
||||
; Function Attrs: nobuiltin
|
||||
declare void @_ZdaPv() #2
|
||||
|
||||
define internal ptr @_Z3barv() #3 {
|
||||
entry:
|
||||
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !2, !callsite !7
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
declare ptr @_Znam(i64)
|
||||
|
||||
define internal ptr @_Z3bazv() #4 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3barv(), !callsite !8
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: noinline
|
||||
define internal ptr @_Z3foov() #5 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3bazv(), !callsite !9
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; uselistorder directives
|
||||
uselistorder ptr @_Z3foov, { 1, 0 }
|
||||
|
||||
attributes #0 = { "tune-cpu"="generic" }
|
||||
attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
||||
attributes #2 = { nobuiltin }
|
||||
attributes #3 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
|
||||
attributes #4 = { "stack-protector-buffer-size"="8" }
|
||||
attributes #5 = { noinline }
|
||||
attributes #6 = { builtin }
|
||||
|
||||
!0 = !{i64 8632435727821051414}
|
||||
!1 = !{i64 -3421689549917153178}
|
||||
!2 = !{!3, !5}
|
||||
!3 = !{!4, !"notcold"}
|
||||
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
|
||||
!5 = !{!6, !"cold"}
|
||||
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
|
||||
!7 = !{i64 9086428284934609951}
|
||||
!8 = !{i64 -5964873800580613432}
|
||||
!9 = !{i64 2732490490862098848}
|
||||
|
||||
|
||||
; DUMP: CCG before cloning:
|
||||
; DUMP: Callsite Context Graph:
|
||||
; DUMP: Node [[BAR:0x[a-z0-9]+]]
|
||||
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
|
||||
; DUMP: Node [[BAZ]]
|
||||
; DUMP: %call = call noundef ptr @_Z3barv() (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[BAZ]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
|
||||
; DUMP: Node [[FOO]]
|
||||
; DUMP: %call = call noundef ptr @_Z3bazv() (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
|
||||
|
||||
; DUMP: Node [[MAIN1]]
|
||||
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
|
||||
; DUMP: AllocTypes: NotCold
|
||||
; DUMP: ContextIds: 1
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[MAIN2]]
|
||||
; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
|
||||
; DOT: digraph "postbuild" {
|
||||
; DOT: label="postbuild";
|
||||
; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
|
||||
; DOT: Node[[BAZ:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAZ]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 12481870273128938184\n_Z3bazv -\> _Z3barv}"];
|
||||
; DOT: Node[[BAZ]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
|
||||
; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\n_Z3foov -\> _Z3bazv}"];
|
||||
; DOT: Node[[FOO]] -> Node[[BAZ]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
|
||||
; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
|
||||
; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 1",fillcolor="brown1"];
|
||||
; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
|
||||
; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 2",fillcolor="cyan"];
|
||||
; DOT: }
|
||||
@@ -1,232 +0,0 @@
|
||||
;; Test callsite context graph generation for call graph with with MIBs
|
||||
;; that have pruned contexts that partially match multiple inlined
|
||||
;; callsite contexts, requiring duplication of context ids and nodes
|
||||
;; while matching callsite nodes onto the graph.
|
||||
;;
|
||||
;; Original code looks like:
|
||||
;;
|
||||
;; char *D() {
|
||||
;; return new char[10];
|
||||
;; }
|
||||
;;
|
||||
;; char *F() {
|
||||
;; return D();
|
||||
;; }
|
||||
;;
|
||||
;; char *C() {
|
||||
;; return D();
|
||||
;; }
|
||||
;;
|
||||
;; char *B() {
|
||||
;; return C();
|
||||
;; }
|
||||
;;
|
||||
;; char *E() {
|
||||
;; return C();
|
||||
;; }
|
||||
;; int main(int argc, char **argv) {
|
||||
;; char *x = B(); // cold
|
||||
;; char *y = E(); // cold
|
||||
;; char *z = F(); // default
|
||||
;; memset(x, 0, 10);
|
||||
;; memset(y, 0, 10);
|
||||
;; memset(z, 0, 10);
|
||||
;; delete[] z;
|
||||
;; sleep(10);
|
||||
;; delete[] x;
|
||||
;; delete[] y;
|
||||
;; return 0;
|
||||
;; }
|
||||
;;
|
||||
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
|
||||
;; memory freed after sleep(10) results in cold lifetimes.
|
||||
;;
|
||||
;; The code below was created by forcing inlining of C into both B and E.
|
||||
;; Since both allocation contexts via C are cold, the matched memprof
|
||||
;; metadata has the context pruned above C's callsite. This requires
|
||||
;; matching the stack node for C to callsites where it was inlined (i.e.
|
||||
;; the callsites in B and E that have callsite metadata that includes C's).
|
||||
;; It also requires duplication of that node in the graph as well as the
|
||||
;; duplication of the context ids along that path through the graph,
|
||||
;; so that we can represent the duplicated (via inlining) C callsite.
|
||||
;;
|
||||
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
|
||||
|
||||
; RUN: opt -passes=memprof-context-disambiguation \
|
||||
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
|
||||
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
|
||||
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
|
||||
|
||||
; RUN: cat %t.ccg.prestackupdate.dot | FileCheck %s --check-prefix=DOTPRE
|
||||
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOTPOST
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define internal ptr @_Z1Dv() {
|
||||
entry:
|
||||
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !memprof !0, !callsite !5
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
declare ptr @_Znam(i64)
|
||||
|
||||
define internal ptr @_Z1Fv() #0 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z1Dv(), !callsite !6
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline optnone uwtable
|
||||
define internal ptr @_Z1Cv() #1 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z1Dv(), !callsite !7
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline optnone uwtable
|
||||
define internal ptr @_Z1Bv() #1 {
|
||||
entry:
|
||||
%call.i = call noundef ptr @_Z1Dv(), !callsite !8
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline optnone uwtable
|
||||
define internal ptr @_Z1Ev() #1 {
|
||||
entry:
|
||||
%call.i = call noundef ptr @_Z1Dv(), !callsite !9
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: noinline
|
||||
declare i32 @main() #2
|
||||
|
||||
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
|
||||
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @_ZdaPv() #4
|
||||
|
||||
declare i32 @sleep() #5
|
||||
|
||||
attributes #0 = { "disable-tail-calls"="true" }
|
||||
attributes #1 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #2 = { noinline }
|
||||
attributes #3 = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
||||
attributes #4 = { nounwind }
|
||||
attributes #5 = { "no-trapping-math"="true" }
|
||||
attributes #6 = { builtin }
|
||||
|
||||
!0 = !{!1, !3}
|
||||
!1 = !{!2, !"cold"}
|
||||
!2 = !{i64 6541423618768552252, i64 -6270142974039008131}
|
||||
!3 = !{!4, !"notcold"}
|
||||
!4 = !{i64 6541423618768552252, i64 -4903163940066524832}
|
||||
!5 = !{i64 6541423618768552252}
|
||||
!6 = !{i64 -4903163940066524832}
|
||||
!7 = !{i64 -6270142974039008131}
|
||||
!8 = !{i64 -6270142974039008131, i64 -184525619819294889}
|
||||
!9 = !{i64 -6270142974039008131, i64 1905834578520680781}
|
||||
|
||||
|
||||
;; After adding only the alloc node memprof metadata, we only have 2 contexts.
|
||||
|
||||
; DUMP: CCG before updating call stack chains:
|
||||
; DUMP: Callsite Context Graph:
|
||||
; DUMP: Node [[D:0x[a-z0-9]+]]
|
||||
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[C:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 2
|
||||
|
||||
; DUMP: Node [[C]]
|
||||
; DUMP: null Call
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 1
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[C]] AllocTypes: Cold ContextIds: 1
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[F]]
|
||||
; DUMP: null Call
|
||||
; DUMP: AllocTypes: NotCold
|
||||
; DUMP: ContextIds: 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
;; After updating for callsite metadata, we should have generated context ids 3 and 4,
|
||||
;; along with 2 new nodes for those callsites. All have the same allocation type
|
||||
;; behavior as the original C node.
|
||||
|
||||
; DUMP: CCG before cloning:
|
||||
; DUMP: Callsite Context Graph:
|
||||
; DUMP: Node [[D]]
|
||||
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2 3 4
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[C2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 3
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[B:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 1
|
||||
|
||||
; DUMP: Node [[F]]
|
||||
; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0)
|
||||
; DUMP: AllocTypes: NotCold
|
||||
; DUMP: ContextIds: 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[F]] AllocTypes: NotCold ContextIds: 2
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[C2]]
|
||||
; DUMP: %call = call noundef ptr @_Z1Dv() (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 3
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[C2]] AllocTypes: Cold ContextIds: 3
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[B]]
|
||||
; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 4
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[B]] AllocTypes: Cold ContextIds: 4
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[E]]
|
||||
; DUMP: %call.i = call noundef ptr @_Z1Dv() (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 1
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[D]] to Caller: [[E]] AllocTypes: Cold ContextIds: 1
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
|
||||
; DOTPRE: digraph "prestackupdate" {
|
||||
; DOTPRE: label="prestackupdate";
|
||||
; DOTPRE: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
|
||||
; DOTPRE: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12176601099670543485\nnull call (external)}"];
|
||||
; DOTPRE: Node[[C]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
|
||||
; DOTPRE: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\nnull call (external)}"];
|
||||
; DOTPRE: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
|
||||
; DOTPRE: }
|
||||
|
||||
|
||||
; DOTPOST:digraph "postbuild" {
|
||||
; DOTPOST: label="postbuild";
|
||||
; DOTPOST: Node[[D:0x[a-z0-9]+]] [shape=record,tooltip="N[[D]] ContextIds: 1 2 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z1Dv -\> _Znam}"];
|
||||
; DOTPOST: Node[[F:0x[a-z0-9]+]] [shape=record,tooltip="N[[F]] ContextIds: 2",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 13543580133643026784\n_Z1Fv -\> _Z1Dv}"];
|
||||
; DOTPOST: Node[[F]] -> Node[[D]][tooltip="ContextIds: 2",fillcolor="brown1"];
|
||||
; DOTPOST: Node[[C:0x[a-z0-9]+]] [shape=record,tooltip="N[[C]] ContextIds: 3",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Cv -\> _Z1Dv}"];
|
||||
; DOTPOST: Node[[C]] -> Node[[D]][tooltip="ContextIds: 3",fillcolor="cyan"];
|
||||
; DOTPOST: Node[[B:0x[a-z0-9]+]] [shape=record,tooltip="N[[B]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Bv -\> _Z1Dv}"];
|
||||
; DOTPOST: Node[[B]] -> Node[[D]][tooltip="ContextIds: 4",fillcolor="cyan"];
|
||||
; DOTPOST: Node[[E:0x[a-z0-9]+]] [shape=record,tooltip="N[[E]] ContextIds: 1",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 0\n_Z1Ev -\> _Z1Dv}"];
|
||||
; DOTPOST: Node[[E]] -> Node[[D]][tooltip="ContextIds: 1",fillcolor="cyan"];
|
||||
; DOTPOST:}
|
||||
@@ -1,386 +0,0 @@
|
||||
;; Test callsite context graph generation for call graph with with MIBs
|
||||
;; that have pruned contexts that partially match multiple inlined
|
||||
;; callsite contexts, requiring duplication of context ids and nodes
|
||||
;; while matching callsite nodes onto the graph. This test requires more
|
||||
;; complex duplication due to multiple contexts for different allocations
|
||||
;; that share some of the same callsite nodes.
|
||||
;;
|
||||
;; Original code looks like:
|
||||
;;
|
||||
;; char *D(bool Call1) {
|
||||
;; if (Call1)
|
||||
;; return new char[10];
|
||||
;; else
|
||||
;; return new char[10];
|
||||
;; }
|
||||
;;
|
||||
;; char *C(bool Call1) {
|
||||
;; return D(Call1);
|
||||
;; }
|
||||
;;
|
||||
;; char *B(bool Call1) {
|
||||
;; if (Call1)
|
||||
;; return C(true);
|
||||
;; else
|
||||
;; return C(false);
|
||||
;; }
|
||||
;;
|
||||
;; char *A(bool Call1) {
|
||||
;; return B(Call1);
|
||||
;; }
|
||||
;;
|
||||
;; char *A1() {
|
||||
;; return A(true);
|
||||
;; }
|
||||
;;
|
||||
;; char *A2() {
|
||||
;; return A(true);
|
||||
;; }
|
||||
;;
|
||||
;; char *A3() {
|
||||
;; return A(false);
|
||||
;; }
|
||||
;;
|
||||
;; char *A4() {
|
||||
;; return A(false);
|
||||
;; }
|
||||
;;
|
||||
;; char *E() {
|
||||
;; return B(true);
|
||||
;; }
|
||||
;;
|
||||
;; char *F() {
|
||||
;; return B(false);
|
||||
;; }
|
||||
;;
|
||||
;; int main(int argc, char **argv) {
|
||||
;; char *a1 = A1(); // cold
|
||||
;; char *a2 = A2(); // cold
|
||||
;; char *e = E(); // default
|
||||
;; char *a3 = A3(); // default
|
||||
;; char *a4 = A4(); // default
|
||||
;; char *f = F(); // cold
|
||||
;; memset(a1, 0, 10);
|
||||
;; memset(a2, 0, 10);
|
||||
;; memset(e, 0, 10);
|
||||
;; memset(a3, 0, 10);
|
||||
;; memset(a4, 0, 10);
|
||||
;; memset(f, 0, 10);
|
||||
;; delete[] a3;
|
||||
;; delete[] a4;
|
||||
;; delete[] e;
|
||||
;; sleep(10);
|
||||
;; delete[] a1;
|
||||
;; delete[] a2;
|
||||
;; delete[] f;
|
||||
;; return 0;
|
||||
;; }
|
||||
;;
|
||||
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
|
||||
;; memory freed after sleep(10) results in cold lifetimes.
|
||||
;;
|
||||
;; The code below was created by forcing inlining of A into its callers,
|
||||
;; without any other inlining or optimizations. Since both allocation contexts
|
||||
;; via A for each allocation in D have the same allocation type (cold via
|
||||
;; A1 and A2 for the first new in D, and non-cold via A3 and A4 for the second
|
||||
;; new in D, the contexts for those respective allocations are pruned above A.
|
||||
;; The allocations via E and F are to ensure we don't prune above B.
|
||||
;;
|
||||
;; The matching onto the inlined A[1234]->A sequences will require duplication
|
||||
;; of the context id assigned to the context from A for each allocation in D.
|
||||
;; This test ensures that we do this correctly in the presence of callsites
|
||||
;; shared by the different duplicated context ids (i.e. callsite in C).
|
||||
;;
|
||||
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
|
||||
|
||||
; RUN: opt -passes=memprof-context-disambiguation \
|
||||
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
|
||||
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
|
||||
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
|
||||
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define ptr @_Z1Db(i1 %Call1) #0 {
|
||||
entry:
|
||||
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5
|
||||
br label %return
|
||||
|
||||
if.else: ; No predecessors!
|
||||
%call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !6, !callsite !11
|
||||
br label %return
|
||||
|
||||
return: ; preds = %if.else, %entry
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: nobuiltin
|
||||
declare ptr @_Znam(i64) #1
|
||||
|
||||
define ptr @_Z1Cb(i1 %Call1) {
|
||||
entry:
|
||||
%tobool = trunc i8 0 to i1
|
||||
%call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool), !callsite !12
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define ptr @_Z1Bb(i1 %Call1) #0 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z1Cb(i1 noundef zeroext true), !callsite !13
|
||||
br label %return
|
||||
|
||||
if.else: ; No predecessors!
|
||||
%call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false), !callsite !14
|
||||
br label %return
|
||||
|
||||
return: ; preds = %if.else, %entry
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
define ptr @_Z1Ab(i1 %tobool) #2 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool), !callsite !15
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define ptr @_Z2A1v(i1 %tobool.i) #0 {
|
||||
entry:
|
||||
%call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !16
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define ptr @_Z2A2v(i1 %tobool.i) #0 {
|
||||
entry:
|
||||
%call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !17
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define ptr @_Z2A3v(i1 %tobool.i) #0 {
|
||||
entry:
|
||||
%call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !18
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define ptr @_Z2A4v(i1 %tobool.i) #0 {
|
||||
entry:
|
||||
%call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i), !callsite !19
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define ptr @_Z1Ev() #0 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z1Bb(i1 noundef zeroext true), !callsite !20
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress noinline uwtable
|
||||
define ptr @_Z1Fv() #0 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z1Bb(i1 noundef zeroext false), !callsite !21
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: noinline
|
||||
declare i32 @main() #3
|
||||
|
||||
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
|
||||
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
|
||||
|
||||
declare void @_ZdaPv() #5
|
||||
|
||||
declare i32 @sleep() #6
|
||||
|
||||
; uselistorder directives
|
||||
uselistorder ptr @_Znam, { 1, 0 }
|
||||
|
||||
attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #1 = { nobuiltin }
|
||||
attributes #2 = { "tune-cpu"="generic" }
|
||||
attributes #3 = { noinline }
|
||||
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
||||
attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
|
||||
attributes #6 = { "disable-tail-calls"="true" }
|
||||
attributes #7 = { builtin allocsize(0) }
|
||||
|
||||
!0 = !{!1, !3}
|
||||
!1 = !{!2, !"notcold"}
|
||||
!2 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 1905834578520680781}
|
||||
!3 = !{!4, !"cold"}
|
||||
!4 = !{i64 4854880825882961848, i64 -904694911315397047, i64 6532298921261778285, i64 -6528110295079665978}
|
||||
!5 = !{i64 4854880825882961848}
|
||||
!6 = !{!7, !9}
|
||||
!7 = !{!8, !"notcold"}
|
||||
!8 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -6528110295079665978}
|
||||
!9 = !{!10, !"cold"}
|
||||
!10 = !{i64 -8775068539491628272, i64 -904694911315397047, i64 7859682663773658275, i64 -4903163940066524832}
|
||||
!11 = !{i64 -8775068539491628272}
|
||||
!12 = !{i64 -904694911315397047}
|
||||
!13 = !{i64 6532298921261778285}
|
||||
!14 = !{i64 7859682663773658275}
|
||||
!15 = !{i64 -6528110295079665978}
|
||||
!16 = !{i64 -6528110295079665978, i64 5747919905719679568}
|
||||
!17 = !{i64 -6528110295079665978, i64 -5753238080028016843}
|
||||
!18 = !{i64 -6528110295079665978, i64 1794685869326395337}
|
||||
!19 = !{i64 -6528110295079665978, i64 5462047985461644151}
|
||||
!20 = !{i64 1905834578520680781}
|
||||
!21 = !{i64 -4903163940066524832}
|
||||
|
||||
|
||||
;; After adding only the alloc node memprof metadata, we only have 4 contexts (we only
|
||||
;; match the interesting parts of the pre-update graph here).
|
||||
|
||||
; DUMP: CCG before updating call stack chains:
|
||||
; DUMP: Callsite Context Graph:
|
||||
|
||||
; DUMP: Node [[D1:0x[a-z0-9]+]]
|
||||
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2
|
||||
|
||||
; DUMP: Node [[C:0x[a-z0-9]+]]
|
||||
; DUMP: null Call
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2 3 4
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
; DUMP: Edge from Callee [[D2:0x[a-z0-9]+]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4
|
||||
|
||||
; DUMP: Node [[D2]]
|
||||
; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 3 4
|
||||
|
||||
|
||||
;; After updating for callsite metadata, we should have duplicated the context
|
||||
;; ids coming from node A (2 and 3) 4 times, for the 4 different callers of A,
|
||||
;; and used those on new nodes for those callers. Note that while in reality
|
||||
;; we only have cold edges coming from A1 and A2 and noncold from A3 and A4,
|
||||
;; due to the pruning we have lost this information and thus end up duplicating
|
||||
;; both of A's contexts to all of the new nodes (which could result in some
|
||||
;; unnecessary cloning.
|
||||
|
||||
; DUMP: CCG before cloning:
|
||||
; DUMP: Callsite Context Graph:
|
||||
; DUMP: Node [[D1]]
|
||||
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2 5 7 9 11
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
|
||||
|
||||
; DUMP: Node [[C]]
|
||||
; DUMP: %call = call noundef ptr @_Z1Db(i1 noundef zeroext %tobool) (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2 3 4 5 6 7 8 9 10 11 12
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[D1]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
|
||||
; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[C]] to Caller: [[B1:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
|
||||
; DUMP: Edge from Callee [[C]] to Caller: [[B2:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
|
||||
|
||||
; DUMP: Node [[B1]]
|
||||
; DUMP: %call = call noundef ptr @_Z1Cb(i1 noundef zeroext true) (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2 5 7 9 11
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[C]] to Caller: [[B1]] AllocTypes: NotColdCold ContextIds: 1 2 5 7 9 11
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[E:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 5
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A3:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 7
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A1:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 9
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 11
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
|
||||
|
||||
; DUMP: Node [[E]]
|
||||
; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext true) (clone 0)
|
||||
; DUMP: AllocTypes: NotCold
|
||||
; DUMP: ContextIds: 1
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[E]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[D2]]
|
||||
; DUMP: %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 3 4 6 8 10 12
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[D2]] to Caller: [[C]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
|
||||
|
||||
; DUMP: Node [[B2]]
|
||||
; DUMP: %call1 = call noundef ptr @_Z1Cb(i1 noundef zeroext false) (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 3 4 6 8 10 12
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[C]] to Caller: [[B2]] AllocTypes: NotColdCold ContextIds: 3 4 6 8 10 12
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[F:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
|
||||
|
||||
; DUMP: Node [[F]]
|
||||
; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext false) (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 4
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[F]] AllocTypes: Cold ContextIds: 4
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[A2]]
|
||||
; DUMP: %call = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool) (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 5 6
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A2]] AllocTypes: Cold ContextIds: 5
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A2]] AllocTypes: NotCold ContextIds: 6
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[A3]]
|
||||
; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 7 8
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A3]] AllocTypes: Cold ContextIds: 7
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A3]] AllocTypes: NotCold ContextIds: 8
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[A1]]
|
||||
; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 9 10
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A1]] AllocTypes: Cold ContextIds: 9
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A1]] AllocTypes: NotCold ContextIds: 10
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[A4]]
|
||||
; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 11 12
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A4]] AllocTypes: Cold ContextIds: 11
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A4]] AllocTypes: NotCold ContextIds: 12
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[A]]
|
||||
; DUMP: %call.i = call noundef ptr @_Z1Bb(i1 noundef zeroext %tobool.i) (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 2 3
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[B1]] to Caller: [[A]] AllocTypes: Cold ContextIds: 2
|
||||
; DUMP: Edge from Callee [[B2]] to Caller: [[A]] AllocTypes: NotCold ContextIds: 3
|
||||
; DUMP: CallerEdges:
|
||||
@@ -1,261 +0,0 @@
|
||||
;; Tests callsite context graph generation for call graph containing indirect
|
||||
;; calls. Currently this should result in conservative behavior, such that the
|
||||
;; indirect call receives a null call in its graph node, to prevent subsequent
|
||||
;; cloning.
|
||||
;;
|
||||
;; Original code looks like:
|
||||
;;
|
||||
;; char *foo() {
|
||||
;; return new char[10];
|
||||
;; }
|
||||
;; class A {
|
||||
;; public:
|
||||
;; virtual char *x() { return foo(); }
|
||||
;; };
|
||||
;; class B : public A {
|
||||
;; public:
|
||||
;; char *x() final { return foo(); }
|
||||
;; };
|
||||
;; char *bar(A *a) {
|
||||
;; return a->x();
|
||||
;; }
|
||||
;; int main(int argc, char **argv) {
|
||||
;; char *x = foo();
|
||||
;; char *y = foo();
|
||||
;; B b;
|
||||
;; char *z = bar(&b);
|
||||
;; char *w = bar(&b);
|
||||
;; A a;
|
||||
;; char *r = bar(&a);
|
||||
;; char *s = bar(&a);
|
||||
;; memset(x, 0, 10);
|
||||
;; memset(y, 0, 10);
|
||||
;; memset(z, 0, 10);
|
||||
;; memset(w, 0, 10);
|
||||
;; memset(r, 0, 10);
|
||||
;; memset(s, 0, 10);
|
||||
;; delete[] x;
|
||||
;; delete[] w;
|
||||
;; delete[] r;
|
||||
;; sleep(10);
|
||||
;; delete[] y;
|
||||
;; delete[] z;
|
||||
;; delete[] s;
|
||||
;; return 0;
|
||||
;; }
|
||||
;;
|
||||
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
|
||||
;; memory freed after sleep(10) results in cold lifetimes.
|
||||
;;
|
||||
;; Compiled without optimization to prevent inlining and devirtualization.
|
||||
;;
|
||||
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
|
||||
|
||||
; RUN: opt -passes=memprof-context-disambiguation \
|
||||
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
|
||||
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
|
||||
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
|
||||
|
||||
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
|
||||
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
declare ptr @_Z3barP1A(ptr)
|
||||
|
||||
define i32 @main(ptr %b, ptr %a) #0 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3foov(), !callsite !0
|
||||
%call1 = call noundef ptr @_Z3foov(), !callsite !1
|
||||
%call2 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !2
|
||||
%call3 = call noundef ptr @_Z3barP1A(ptr noundef %b), !callsite !3
|
||||
%call4 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !4
|
||||
%call5 = call noundef ptr @_Z3barP1A(ptr noundef %a), !callsite !5
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Function Attrs: noinline
|
||||
declare void @_ZN1BC2Ev() #1
|
||||
|
||||
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
|
||||
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
|
||||
|
||||
; Function Attrs: nobuiltin
|
||||
declare void @_ZdaPv() #3
|
||||
|
||||
define internal ptr @_ZN1A1xEv() #4 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3foov(), !callsite !6
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress uwtable
|
||||
define internal ptr @_ZN1B1xEv() #5 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3foov(), !callsite !7
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: mustprogress uwtable
|
||||
define internal ptr @_Z3foov() #5 {
|
||||
entry:
|
||||
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !8, !callsite !21
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
declare ptr @_Znam(i64) #6
|
||||
|
||||
; uselistorder directives
|
||||
uselistorder ptr @_Z3foov, { 3, 2, 1, 0 }
|
||||
|
||||
attributes #0 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
|
||||
attributes #1 = { noinline }
|
||||
attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
||||
attributes #3 = { nobuiltin }
|
||||
attributes #4 = { "tune-cpu"="generic" }
|
||||
attributes #5 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
|
||||
attributes #6 = { "disable-tail-calls"="true" }
|
||||
attributes #7 = { builtin }
|
||||
|
||||
!0 = !{i64 8632435727821051414}
|
||||
!1 = !{i64 -3421689549917153178}
|
||||
!2 = !{i64 6792096022461663180}
|
||||
!3 = !{i64 -2709642582978494015}
|
||||
!4 = !{i64 748269490701775343}
|
||||
!5 = !{i64 -5747251260480066785}
|
||||
!6 = !{i64 8256774051149711748}
|
||||
!7 = !{i64 -4831879094954754638}
|
||||
!8 = !{!9, !11, !13, !15, !17, !19}
|
||||
!9 = !{!10, !"notcold"}
|
||||
!10 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 748269490701775343}
|
||||
!11 = !{!12, !"cold"}
|
||||
!12 = !{i64 2732490490862098848, i64 8256774051149711748, i64 -4820244510750103755, i64 -5747251260480066785}
|
||||
!13 = !{!14, !"notcold"}
|
||||
!14 = !{i64 2732490490862098848, i64 8632435727821051414}
|
||||
!15 = !{!16, !"cold"}
|
||||
!16 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 6792096022461663180}
|
||||
!17 = !{!18, !"notcold"}
|
||||
!18 = !{i64 2732490490862098848, i64 -4831879094954754638, i64 -4820244510750103755, i64 -2709642582978494015}
|
||||
!19 = !{!20, !"cold"}
|
||||
!20 = !{i64 2732490490862098848, i64 -3421689549917153178}
|
||||
!21 = !{i64 2732490490862098848}
|
||||
|
||||
|
||||
; DUMP: CCG before cloning:
|
||||
; DUMP: Callsite Context Graph:
|
||||
; DUMP: Node [[FOO:0x[a-z0-9]+]]
|
||||
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2 3 4 5 6
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[AX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[BX:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 4 5
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 6
|
||||
|
||||
; DUMP: Node [[AX]]
|
||||
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[AX]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[AX]] to Caller: [[BAR:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
|
||||
;; Bar contains an indirect call, with multiple targets. It's call should be null.
|
||||
; DUMP: Node [[BAR]]
|
||||
; DUMP: null Call
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2 4 5
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[AX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 5
|
||||
|
||||
; DUMP: Node [[MAIN3]]
|
||||
; DUMP: %call4 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0)
|
||||
; DUMP: AllocTypes: NotCold
|
||||
; DUMP: ContextIds: 1
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN3]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[MAIN4]]
|
||||
; DUMP: %call5 = call noundef ptr @_Z3barP1A(ptr noundef %a) (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN4]] AllocTypes: Cold ContextIds: 2
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[MAIN1]]
|
||||
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
|
||||
; DUMP: AllocTypes: NotCold
|
||||
; DUMP: ContextIds: 3
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[BX]]
|
||||
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 4 5
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[BX]] AllocTypes: NotColdCold ContextIds: 4 5
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[BX]] to Caller: [[BAR]] AllocTypes: NotColdCold ContextIds: 4 5
|
||||
|
||||
; DUMP: Node [[MAIN5]]
|
||||
; DUMP: %call2 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 4
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN5]] AllocTypes: Cold ContextIds: 4
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[MAIN6]]
|
||||
; DUMP: %call3 = call noundef ptr @_Z3barP1A(ptr noundef %b) (clone 0)
|
||||
; DUMP: AllocTypes: NotCold
|
||||
; DUMP: ContextIds: 5
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN6]] AllocTypes: NotCold ContextIds: 5
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[MAIN2]]
|
||||
; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 6
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 6
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
|
||||
; DOT: digraph "postbuild" {
|
||||
; DOT: label="postbuild";
|
||||
; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 1 2 3 4 5 6",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3foov -\> _Znam}"];
|
||||
; DOT: Node[[AX:0x[a-z0-9]+]] [shape=record,tooltip="N[[AX]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 8256774051149711748\n_ZN1A1xEv -\> _Z3foov}"];
|
||||
; DOT: Node[[AX]] -> Node[[FOO]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
|
||||
; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13626499562959447861\nnull call (external)}"];
|
||||
; DOT: Node[[BAR]] -> Node[[AX]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
|
||||
; DOT: Node[[BAR]] -> Node[[BX:0x[a-z0-9]+]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
|
||||
; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 748269490701775343\nmain -\> _Z3barP1A}"];
|
||||
; DOT: Node[[MAIN1]] -> Node[[BAR]][tooltip="ContextIds: 1",fillcolor="brown1"];
|
||||
; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 12699492813229484831\nmain -\> _Z3barP1A}"];
|
||||
; DOT: Node[[MAIN2]] -> Node[[BAR]][tooltip="ContextIds: 2",fillcolor="cyan"];
|
||||
; DOT: Node[[MAIN3:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN3]] ContextIds: 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
|
||||
; DOT: Node[[MAIN3]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"];
|
||||
; DOT: Node[[BX]] [shape=record,tooltip="N[[BX]] ContextIds: 4 5",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 13614864978754796978\n_ZN1B1xEv -\> _Z3foov}"];
|
||||
; DOT: Node[[BX]] -> Node[[FOO]][tooltip="ContextIds: 4 5",fillcolor="mediumorchid1"];
|
||||
; DOT: Node[[MAIN4:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN4]] ContextIds: 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 6792096022461663180\nmain -\> _Z3barP1A}"];
|
||||
; DOT: Node[[MAIN4]] -> Node[[BAR]][tooltip="ContextIds: 4",fillcolor="cyan"];
|
||||
; DOT: Node[[MAIN5:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN5]] ContextIds: 5",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 15737101490731057601\nmain -\> _Z3barP1A}"];
|
||||
; DOT: Node[[MAIN5]] -> Node[[BAR]][tooltip="ContextIds: 5",fillcolor="brown1"];
|
||||
; DOT: Node[[MAIN6:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN6]] ContextIds: 6",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
|
||||
; DOT: Node[[MAIN6]] -> Node[[FOO]][tooltip="ContextIds: 6",fillcolor="cyan"];
|
||||
; DOT: }
|
||||
@@ -1,189 +0,0 @@
|
||||
;; Test callsite context graph generation for call graph with two memprof
|
||||
;; contexts and partial inlining, requiring generation of a new fused node to
|
||||
;; represent the inlined sequence while matching callsite nodes onto the graph.
|
||||
;;
|
||||
;; Original code looks like:
|
||||
;;
|
||||
;; char *bar() {
|
||||
;; return new char[10];
|
||||
;; }
|
||||
;;
|
||||
;; char *baz() {
|
||||
;; return bar();
|
||||
;; }
|
||||
;;
|
||||
;; char *foo() {
|
||||
;; return baz();
|
||||
;; }
|
||||
;;
|
||||
;; int main(int argc, char **argv) {
|
||||
;; char *x = foo();
|
||||
;; char *y = foo();
|
||||
;; memset(x, 0, 10);
|
||||
;; memset(y, 0, 10);
|
||||
;; delete[] x;
|
||||
;; sleep(10);
|
||||
;; delete[] y;
|
||||
;; return 0;
|
||||
;; }
|
||||
;;
|
||||
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
|
||||
;; memory freed after sleep(10) results in cold lifetimes.
|
||||
;;
|
||||
;; The code below was created by forcing inlining of baz into foo, and
|
||||
;; bar into baz. Due to the inlining of bar we will initially have two
|
||||
;; allocation nodes in the graph. This tests that we correctly match
|
||||
;; foo (with baz inlined) onto the graph nodes first, and generate a new
|
||||
;; fused node for it. We should then not match baz (with bar inlined) as that
|
||||
;; is not reached by the MIB contexts (since all calls from main will look
|
||||
;; like main -> foo(+baz) -> bar after the inlining reflected in this IR).
|
||||
;;
|
||||
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
|
||||
|
||||
; RUN: opt -passes=memprof-context-disambiguation \
|
||||
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
|
||||
; RUN: -memprof-export-to-dot -memprof-dot-file-path-prefix=%t. \
|
||||
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
|
||||
|
||||
; RUN: cat %t.ccg.postbuild.dot | FileCheck %s --check-prefix=DOT
|
||||
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define internal ptr @_Z3barv() {
|
||||
entry:
|
||||
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !5
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: nobuiltin
|
||||
declare ptr @_Znam(i64) #0
|
||||
|
||||
; Function Attrs: mustprogress
|
||||
define internal ptr @_Z3bazv() #1 {
|
||||
entry:
|
||||
%call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7, !memprof !0, !callsite !6
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: noinline
|
||||
define internal ptr @_Z3foov() #2 {
|
||||
entry:
|
||||
%call.i = call noundef ptr @_Z3barv(), !callsite !7
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
define i32 @main() #3 {
|
||||
entry:
|
||||
%call = call noundef ptr @_Z3foov(), !callsite !8
|
||||
%call1 = call noundef ptr @_Z3foov(), !callsite !9
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
|
||||
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
|
||||
|
||||
; Function Attrs: nounwind
|
||||
declare void @_ZdaPv() #5
|
||||
|
||||
declare i32 @sleep() #6
|
||||
|
||||
attributes #0 = { nobuiltin }
|
||||
attributes #1 = { mustprogress }
|
||||
attributes #2 = { noinline }
|
||||
attributes #3 = { "tune-cpu"="generic" }
|
||||
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
||||
attributes #5 = { nounwind }
|
||||
attributes #6 = { "disable-tail-calls"="true" }
|
||||
attributes #7 = { builtin }
|
||||
|
||||
!0 = !{!1, !3}
|
||||
!1 = !{!2, !"notcold"}
|
||||
!2 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
|
||||
!3 = !{!4, !"cold"}
|
||||
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
|
||||
!5 = !{i64 9086428284934609951}
|
||||
!6 = !{i64 9086428284934609951, i64 -5964873800580613432}
|
||||
!7 = !{i64 -5964873800580613432, i64 2732490490862098848}
|
||||
!8 = !{i64 8632435727821051414}
|
||||
!9 = !{i64 -3421689549917153178}
|
||||
|
||||
|
||||
; DUMP: CCG before cloning:
|
||||
; DUMP: Callsite Context Graph:
|
||||
; DUMP: Node [[BAR:0x[a-z0-9]+]]
|
||||
; DUMP: %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO:0x[a-z0-9]+]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
|
||||
;; This is leftover from the MIB on the alloc inlined into baz. It is not
|
||||
;; matched with any call, since there is no such node in the IR. Due to the
|
||||
;; null call it will not participate in any context transformations.
|
||||
; DUMP: Node [[FOO2:0x[a-z0-9]+]]
|
||||
; DUMP: null Call
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 3 4
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAZ:0x[a-z0-9]+]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 3
|
||||
; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 4
|
||||
|
||||
; DUMP: Node [[MAIN1]]
|
||||
; DUMP: %call = call noundef ptr @_Z3foov() (clone 0)
|
||||
; DUMP: AllocTypes: NotCold
|
||||
; DUMP: ContextIds: 1 3
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 3
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[MAIN2]]
|
||||
; DUMP: %call1 = call noundef ptr @_Z3foov() (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 2 4
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[FOO2]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 4
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
; DUMP: Node [[BAZ]]
|
||||
; DUMP: %call.i = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #7 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 3 4
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[BAZ]] to Caller: [[FOO2]] AllocTypes: NotColdCold ContextIds: 3 4
|
||||
|
||||
;; This is the node synthesized for the call to bar in foo that was created
|
||||
;; by inlining baz into foo.
|
||||
; DUMP: Node [[FOO]]
|
||||
; DUMP: %call.i = call noundef ptr @_Z3barv() (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[FOO]] AllocTypes: NotColdCold ContextIds: 1 2
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: Edge from Callee [[FOO]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
|
||||
|
||||
|
||||
; DOT: digraph "postbuild" {
|
||||
; DOT: label="postbuild";
|
||||
; DOT: Node[[BAR:0x[a-z0-9]+]] [shape=record,tooltip="N[[BAR]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc0\n_Z3barv -\> _Znam}"];
|
||||
; DOT: Node[[FOO:0x[a-z0-9]+]] [shape=record,tooltip="N[[FOO]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 2732490490862098848\nnull call (external)}"];
|
||||
; DOT: Node[[FOO]] -> Node[[BAZ:0x[a-z0-9]+]][tooltip="ContextIds: 3 4",fillcolor="mediumorchid1"];
|
||||
; DOT: Node[[MAIN1:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN1]] ContextIds: 1 3",fillcolor="brown1",style="filled",style="filled",label="{OrigId: 8632435727821051414\nmain -\> _Z3foov}"];
|
||||
; DOT: Node[[MAIN1]] -> Node[[FOO]][tooltip="ContextIds: 3",fillcolor="brown1"];
|
||||
; DOT: Node[[MAIN1]] -> Node[[FOO2:0x[a-z0-9]+]][tooltip="ContextIds: 1",fillcolor="brown1"];
|
||||
; DOT: Node[[MAIN2:0x[a-z0-9]+]] [shape=record,tooltip="N[[MAIN2]] ContextIds: 2 4",fillcolor="cyan",style="filled",style="filled",label="{OrigId: 15025054523792398438\nmain -\> _Z3foov}"];
|
||||
; DOT: Node[[MAIN2]] -> Node[[FOO]][tooltip="ContextIds: 4",fillcolor="cyan"];
|
||||
; DOT: Node[[MAIN2]] -> Node[[FOO2]][tooltip="ContextIds: 2",fillcolor="cyan"];
|
||||
; DOT: Node[[BAZ]] [shape=record,tooltip="N[[BAZ]] ContextIds: 3 4",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: Alloc2\n_Z3bazv -\> _Znam}"];
|
||||
; DOT: Node[[FOO2]] [shape=record,tooltip="N[[FOO2]] ContextIds: 1 2",fillcolor="mediumorchid1",style="filled",style="filled",label="{OrigId: 0\n_Z3foov -\> _Z3barv}"];
|
||||
; DOT: Node[[FOO2]] -> Node[[BAR]][tooltip="ContextIds: 1 2",fillcolor="mediumorchid1"];
|
||||
; DOT: }
|
||||
@@ -1,135 +0,0 @@
|
||||
;; Test callsite context graph generation for call graph with two memprof
|
||||
;; contexts and multiple levels of inlining, requiring generation of new
|
||||
;; fused nodes to represent the inlined sequence while matching callsite
|
||||
;; nodes onto the graph. In particular this tests the case where a function
|
||||
;; has inlined a callee containing an inlined callee.
|
||||
;;
|
||||
;; Original code looks like:
|
||||
;;
|
||||
;; char *bar() __attribute__((noinline)) {
|
||||
;; return new char[10];
|
||||
;; }
|
||||
;;
|
||||
;; char *baz() {
|
||||
;; return bar();
|
||||
;; }
|
||||
;;
|
||||
;; char *foo() {
|
||||
;; return baz();
|
||||
;; }
|
||||
;;
|
||||
;; int main(int argc, char **argv) {
|
||||
;; char *x = foo();
|
||||
;; char *y = foo();
|
||||
;; memset(x, 0, 10);
|
||||
;; memset(y, 0, 10);
|
||||
;; delete[] x;
|
||||
;; sleep(10);
|
||||
;; delete[] y;
|
||||
;; return 0;
|
||||
;; }
|
||||
;;
|
||||
;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
|
||||
;; memory freed after sleep(10) results in cold lifetimes.
|
||||
;;
|
||||
;; Both foo and baz are inlined into main, at both foo callsites.
|
||||
;; We should update the graph for new fused nodes for both of those inlined
|
||||
;; callsites to bar.
|
||||
;;
|
||||
;; Note that baz and bar are both dead due to the inlining, but have been left
|
||||
;; in the input IR to ensure that the MIB call chain is matched to the longer
|
||||
;; inline sequences from main.
|
||||
;;
|
||||
;; The IR was then reduced using llvm-reduce with the expected FileCheck input.
|
||||
|
||||
; RUN: opt -passes=memprof-context-disambiguation \
|
||||
; RUN: -memprof-verify-ccg -memprof-verify-nodes -memprof-dump-ccg \
|
||||
; RUN: %s -S 2>&1 | FileCheck %s --check-prefix=DUMP
|
||||
|
||||
|
||||
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
define ptr @_Z3barv() #0 {
|
||||
entry:
|
||||
%call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !memprof !7, !callsite !12, !heapallocsite !13
|
||||
ret ptr null
|
||||
}
|
||||
|
||||
; Function Attrs: nobuiltin
|
||||
declare ptr @_Znam(i64) #1
|
||||
|
||||
; Function Attrs: mustprogress
|
||||
declare ptr @_Z3bazv() #2
|
||||
|
||||
define i32 @main() #3 {
|
||||
delete.end5:
|
||||
%call.i.i = call noundef ptr @_Z3barv(), !callsite !14
|
||||
%call.i.i8 = call noundef ptr @_Z3barv(), !callsite !15
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
|
||||
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #4
|
||||
|
||||
declare void @_ZdaPv() #5
|
||||
|
||||
declare i32 @sleep() #6
|
||||
|
||||
attributes #0 = { "stack-protector-buffer-size"="8" }
|
||||
attributes #1 = { nobuiltin }
|
||||
attributes #2 = { mustprogress }
|
||||
attributes #3 = { "tune-cpu"="generic" }
|
||||
attributes #4 = { nocallback nofree nounwind willreturn memory(argmem: write) }
|
||||
attributes #5 = { "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
|
||||
attributes #6 = { "disable-tail-calls"="true" }
|
||||
attributes #7 = { builtin }
|
||||
|
||||
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6}
|
||||
|
||||
!0 = !{i32 7, !"Dwarf Version", i32 5}
|
||||
!1 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!2 = !{i32 1, !"wchar_size", i32 4}
|
||||
!3 = !{i32 8, !"PIC Level", i32 2}
|
||||
!4 = !{i32 7, !"PIE Level", i32 2}
|
||||
!5 = !{i32 7, !"uwtable", i32 2}
|
||||
!6 = !{i32 7, !"frame-pointer", i32 2}
|
||||
!7 = !{!8, !10}
|
||||
!8 = !{!9, !"notcold"}
|
||||
!9 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
|
||||
!10 = !{!11, !"cold"}
|
||||
!11 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
|
||||
!12 = !{i64 9086428284934609951}
|
||||
!13 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
|
||||
!14 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
|
||||
!15 = !{i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
|
||||
|
||||
|
||||
; DUMP: CCG before cloning:
|
||||
; DUMP: Callsite Context Graph:
|
||||
; DUMP: Node [[BAR:0x[a-z0-9]+]]
|
||||
; DUMP: %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #7, !heapallocsite !7 (clone 0)
|
||||
; DUMP: AllocTypes: NotColdCold
|
||||
; DUMP: ContextIds: 1 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: CallerEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1:0x[a-z0-9]+]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2:0x[a-z0-9]+]] AllocTypes: Cold ContextIds: 2
|
||||
|
||||
;; This is the node synthesized for the first inlined call chain of main->foo->baz
|
||||
; DUMP: Node [[MAIN1]]
|
||||
; DUMP: %call.i.i = call noundef ptr @_Z3barv() (clone 0)
|
||||
; DUMP: AllocTypes: NotCold
|
||||
; DUMP: ContextIds: 1
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN1]] AllocTypes: NotCold ContextIds: 1
|
||||
; DUMP: CallerEdges:
|
||||
|
||||
;; This is the node synthesized for the second inlined call chain of main->foo->baz
|
||||
; DUMP: Node [[MAIN2]]
|
||||
; DUMP: %call.i.i8 = call noundef ptr @_Z3barv() (clone 0)
|
||||
; DUMP: AllocTypes: Cold
|
||||
; DUMP: ContextIds: 2
|
||||
; DUMP: CalleeEdges:
|
||||
; DUMP: Edge from Callee [[BAR]] to Caller: [[MAIN2]] AllocTypes: Cold ContextIds: 2
|
||||
; DUMP: CallerEdges:
|
||||
@@ -1,41 +0,0 @@
|
||||
;; Test that MemProfContextDisambiguation is enabled under the expected conditions
|
||||
;; and in the expected position.
|
||||
|
||||
;; Pass is not currently enabled by default at any opt level.
|
||||
; RUN: opt -debug-pass-manager -passes='lto<O0>' -S %s \
|
||||
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
|
||||
; RUN: opt -debug-pass-manager -passes='lto<O1>' -S %s \
|
||||
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
|
||||
; RUN: opt -debug-pass-manager -passes='lto<O2>' -S %s \
|
||||
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
|
||||
; RUN: opt -debug-pass-manager -passes='lto<O3>' -S %s \
|
||||
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
|
||||
|
||||
;; Pass should not run even under option at O0/O1.
|
||||
; RUN: opt -debug-pass-manager -passes='lto<O0>' -S %s \
|
||||
; RUN: -enable-memprof-context-disambiguation \
|
||||
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
|
||||
; RUN: opt -debug-pass-manager -passes='lto<O1>' -S %s \
|
||||
; RUN: -enable-memprof-context-disambiguation \
|
||||
; RUN: 2>&1 | FileCheck %s --implicit-check-not="Running pass: MemProfContextDisambiguation"
|
||||
|
||||
;; Pass should be enabled under option at O2/O3.
|
||||
; RUN: opt -debug-pass-manager -passes='lto<O2>' -S %s \
|
||||
; RUN: -enable-memprof-context-disambiguation \
|
||||
; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED
|
||||
; RUN: opt -debug-pass-manager -passes='lto<O3>' -S %s \
|
||||
; RUN: -enable-memprof-context-disambiguation \
|
||||
; RUN: 2>&1 | FileCheck %s --check-prefix=ENABLED
|
||||
|
||||
;; When enabled, MemProfContextDisambiguation runs just after inlining.
|
||||
; ENABLED: Running pass: InlinerPass
|
||||
; ENABLED: Invalidating analysis: InlineAdvisorAnalysis
|
||||
; ENABLED: Running pass: MemProfContextDisambiguation
|
||||
|
||||
define noundef ptr @_Z3barv() {
|
||||
entry:
|
||||
%call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10)
|
||||
ret ptr %call
|
||||
}
|
||||
|
||||
declare noundef nonnull ptr @_Znam(i64 noundef)
|
||||
Reference in New Issue
Block a user