mirror of
https://github.com/intel/llvm.git
synced 2026-01-13 19:08:21 +08:00
[BOLT] Print heatmap from perf2bolt (#139194)
Add perf2bolt `--heatmap` option to produce heatmaps during profile aggregation. Distinguish exclusive mode (`llvm-bolt-heatmap`) and optional mode (`perf2bolt --heatmap`), which impacts perf.data handling: exclusive mode covers all addresses, whereas optional mode consumes attached profile only covering function addresses. Test Plan: updated per2bolt tests: - pre-aggregated-perf.test: pre-aggregated data, - bolt-address-translation-yaml.test: pre-aggregated + BOLTed input, - perf_test.test: no-LBR perf data.
This commit is contained in:
@@ -17,7 +17,13 @@
|
||||
|
||||
namespace opts {
|
||||
|
||||
extern bool HeatmapMode;
|
||||
enum HeatmapModeKind {
|
||||
HM_None = 0,
|
||||
HM_Exclusive, // llvm-bolt-heatmap
|
||||
HM_Optional // perf2bolt --heatmap
|
||||
};
|
||||
|
||||
extern HeatmapModeKind HeatmapMode;
|
||||
extern bool BinaryAnalysisMode;
|
||||
|
||||
extern llvm::cl::OptionCategory BoltCategory;
|
||||
@@ -45,6 +51,7 @@ extern llvm::cl::opt<unsigned> HeatmapBlock;
|
||||
extern llvm::cl::opt<unsigned long long> HeatmapMaxAddress;
|
||||
extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
|
||||
extern llvm::cl::opt<bool> HeatmapPrintMappings;
|
||||
extern llvm::cl::opt<std::string> HeatmapOutput;
|
||||
extern llvm::cl::opt<bool> HotData;
|
||||
extern llvm::cl::opt<bool> HotFunctionsAtEnd;
|
||||
extern llvm::cl::opt<bool> HotText;
|
||||
|
||||
@@ -66,7 +66,7 @@ extern cl::opt<bool> UpdateDebugSections;
|
||||
extern cl::opt<unsigned> Verbosity;
|
||||
|
||||
extern bool BinaryAnalysisMode;
|
||||
extern bool HeatmapMode;
|
||||
extern HeatmapModeKind HeatmapMode;
|
||||
extern bool processAllFunctions();
|
||||
|
||||
static cl::opt<bool> CheckEncoding(
|
||||
|
||||
@@ -164,6 +164,10 @@ void DataAggregator::findPerfExecutable() {
|
||||
void DataAggregator::start() {
|
||||
outs() << "PERF2BOLT: Starting data aggregation job for " << Filename << "\n";
|
||||
|
||||
// Turn on heatmap building if requested by --heatmap flag.
|
||||
if (!opts::HeatmapMode && opts::HeatmapOutput.getNumOccurrences())
|
||||
opts::HeatmapMode = opts::HeatmapModeKind::HM_Optional;
|
||||
|
||||
// Don't launch perf for pre-aggregated files or when perf input is specified
|
||||
// by the user.
|
||||
if (opts::ReadPreAggregated || !opts::ReadPerfEvents.empty())
|
||||
@@ -502,24 +506,25 @@ Error DataAggregator::preprocessProfile(BinaryContext &BC) {
|
||||
errs() << "PERF2BOLT: failed to parse samples\n";
|
||||
|
||||
// Special handling for memory events
|
||||
if (prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
|
||||
return Error::success();
|
||||
|
||||
if (const std::error_code EC = parseMemEvents())
|
||||
errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
|
||||
<< '\n';
|
||||
if (!prepareToParse("mem events", MemEventsPPI, MemEventsErrorCallback))
|
||||
if (const std::error_code EC = parseMemEvents())
|
||||
errs() << "PERF2BOLT: failed to parse memory events: " << EC.message()
|
||||
<< '\n';
|
||||
|
||||
deleteTempFiles();
|
||||
|
||||
heatmap:
|
||||
if (opts::HeatmapMode) {
|
||||
if (std::error_code EC = printLBRHeatMap()) {
|
||||
errs() << "ERROR: failed to print heat map: " << EC.message() << '\n';
|
||||
exit(1);
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
return Error::success();
|
||||
if (!opts::HeatmapMode)
|
||||
return Error::success();
|
||||
|
||||
if (std::error_code EC = printLBRHeatMap())
|
||||
return errorCodeToError(EC);
|
||||
|
||||
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Optional)
|
||||
return Error::success();
|
||||
|
||||
assert(opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
Error DataAggregator::readProfile(BinaryContext &BC) {
|
||||
@@ -1351,15 +1356,14 @@ std::error_code DataAggregator::printLBRHeatMap() {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
HM.print(opts::OutputFilename);
|
||||
if (opts::OutputFilename == "-")
|
||||
HM.printCDF(opts::OutputFilename);
|
||||
else
|
||||
HM.printCDF(opts::OutputFilename + ".csv");
|
||||
if (opts::OutputFilename == "-")
|
||||
HM.printSectionHotness(opts::OutputFilename);
|
||||
else
|
||||
HM.printSectionHotness(opts::OutputFilename + "-section-hotness.csv");
|
||||
HM.print(opts::HeatmapOutput);
|
||||
if (opts::HeatmapOutput == "-") {
|
||||
HM.printCDF(opts::HeatmapOutput);
|
||||
HM.printSectionHotness(opts::HeatmapOutput);
|
||||
} else {
|
||||
HM.printCDF(opts::HeatmapOutput + ".csv");
|
||||
HM.printSectionHotness(opts::HeatmapOutput + "-section-hotness.csv");
|
||||
}
|
||||
|
||||
return std::error_code();
|
||||
}
|
||||
@@ -1386,7 +1390,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
|
||||
const uint64_t TraceTo = NextLBR->From;
|
||||
const BinaryFunction *TraceBF =
|
||||
getBinaryFunctionContainingAddress(TraceFrom);
|
||||
if (opts::HeatmapMode) {
|
||||
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive) {
|
||||
FTInfo &Info = FallthroughLBRs[Trace(TraceFrom, TraceTo)];
|
||||
++Info.InternCount;
|
||||
} else if (TraceBF && TraceBF->containsAddress(TraceTo)) {
|
||||
@@ -1424,7 +1428,7 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
|
||||
NextLBR = &LBR;
|
||||
|
||||
// Record branches outside binary functions for heatmap.
|
||||
if (opts::HeatmapMode) {
|
||||
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive) {
|
||||
TakenBranchInfo &Info = BranchLBRs[Trace(LBR.From, LBR.To)];
|
||||
++Info.TakenCount;
|
||||
continue;
|
||||
@@ -1439,7 +1443,8 @@ void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
|
||||
}
|
||||
// Record LBR addresses not covered by fallthroughs (bottom-of-stack source
|
||||
// and top-of-stack target) as basic samples for heatmap.
|
||||
if (opts::HeatmapMode && !Sample.LBR.empty()) {
|
||||
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive &&
|
||||
!Sample.LBR.empty()) {
|
||||
++BasicSamples[Sample.LBR.front().To];
|
||||
++BasicSamples[Sample.LBR.back().From];
|
||||
}
|
||||
|
||||
@@ -305,10 +305,11 @@ void Heatmap::printSectionHotness(raw_ostream &OS) const {
|
||||
|
||||
uint64_t UnmappedHotness = 0;
|
||||
auto RecordUnmappedBucket = [&](uint64_t Address, uint64_t Frequency) {
|
||||
errs() << "Couldn't map the address bucket [0x" << Twine::utohexstr(Address)
|
||||
<< ", 0x" << Twine::utohexstr(Address + BucketSize)
|
||||
<< "] containing " << Frequency
|
||||
<< " samples to a text section in the binary.";
|
||||
if (opts::Verbosity >= 1)
|
||||
errs() << "Couldn't map the address bucket [0x"
|
||||
<< Twine::utohexstr(Address) << ", 0x"
|
||||
<< Twine::utohexstr(Address + BucketSize) << "] containing "
|
||||
<< Frequency << " samples to a text section in the binary.";
|
||||
UnmappedHotness += Frequency;
|
||||
};
|
||||
|
||||
|
||||
@@ -1453,7 +1453,8 @@ void RewriteInstance::updateRtFiniReloc() {
|
||||
}
|
||||
|
||||
void RewriteInstance::registerFragments() {
|
||||
if (!BC->HasSplitFunctions || opts::HeatmapMode)
|
||||
if (!BC->HasSplitFunctions ||
|
||||
opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive)
|
||||
return;
|
||||
|
||||
// Process fragments with ambiguous parents separately as they are typically a
|
||||
@@ -1998,7 +1999,7 @@ Error RewriteInstance::readSpecialSections() {
|
||||
BC->getUniqueSectionByName(BoltAddressTranslation::SECTION_NAME)) {
|
||||
BC->HasBATSection = true;
|
||||
// Do not read BAT when plotting a heatmap
|
||||
if (!opts::HeatmapMode) {
|
||||
if (opts::HeatmapMode != opts::HeatmapModeKind::HM_Exclusive) {
|
||||
if (std::error_code EC = BAT->parse(BC->outs(), BATSec->getContents())) {
|
||||
BC->errs() << "BOLT-ERROR: failed to parse BOLT address translation "
|
||||
"table.\n";
|
||||
@@ -2037,7 +2038,7 @@ Error RewriteInstance::readSpecialSections() {
|
||||
}
|
||||
|
||||
// Force non-relocation mode for heatmap generation
|
||||
if (opts::HeatmapMode)
|
||||
if (opts::HeatmapMode == opts::HeatmapModeKind::HM_Exclusive)
|
||||
BC->HasRelocations = false;
|
||||
|
||||
if (BC->HasRelocations)
|
||||
|
||||
@@ -28,7 +28,7 @@ const char *BoltRevision =
|
||||
|
||||
namespace opts {
|
||||
|
||||
bool HeatmapMode = false;
|
||||
HeatmapModeKind HeatmapMode = HM_None;
|
||||
bool BinaryAnalysisMode = false;
|
||||
|
||||
cl::OptionCategory BoltCategory("BOLT generic options");
|
||||
@@ -124,6 +124,10 @@ cl::opt<bool> HeatmapPrintMappings(
|
||||
"sections (default false)"),
|
||||
cl::Optional, cl::cat(HeatmapCategory));
|
||||
|
||||
cl::opt<std::string> HeatmapOutput("heatmap",
|
||||
cl::desc("print heatmap to a given file"),
|
||||
cl::Optional, cl::cat(HeatmapCategory));
|
||||
|
||||
cl::opt<bool> HotData("hot-data",
|
||||
cl::desc("hot data symbols support (relocation mode)"),
|
||||
cl::cat(BoltCategory));
|
||||
|
||||
@@ -28,8 +28,9 @@ ORDER-YAML-CHECK-NEXT: calls: [ { off: 0x26, fid: [[#]], cnt: 20 } ]
|
||||
ORDER-YAML-CHECK-NEXT: succ: [ { bid: 5, cnt: 7 }
|
||||
## Large profile test
|
||||
RUN: perf2bolt %t.out --pa -p %p/Inputs/blarge_new_bat.preagg.txt -w %t.yaml -o %t.fdata \
|
||||
RUN: 2>&1 | FileCheck --check-prefix READ-BAT-CHECK %s
|
||||
RUN: --heatmap %t.hm 2>&1 | FileCheck --check-prefix READ-BAT-CHECK %s
|
||||
RUN: FileCheck --input-file %t.yaml --check-prefix YAML-BAT-CHECK %s
|
||||
RUN: FileCheck --input-file %t.hm-section-hotness.csv --check-prefix CHECK-HM %s
|
||||
## Check that YAML converted from fdata matches YAML created directly with BAT.
|
||||
RUN: llvm-bolt %t.exe -data %t.fdata -w %t.yaml-fdata -o /dev/null \
|
||||
RUN: 2>&1 | FileCheck --check-prefix READ-BAT-FDATA-CHECK %s
|
||||
@@ -46,8 +47,10 @@ WRITE-BAT-CHECK: BOLT-INFO: BAT section size (bytes): 404
|
||||
READ-BAT-CHECK-NOT: BOLT-ERROR: unable to save profile in YAML format for input file processed by BOLT
|
||||
READ-BAT-CHECK: BOLT-INFO: Parsed 5 BAT entries
|
||||
READ-BAT-CHECK: PERF2BOLT: read 79 aggregated LBR entries
|
||||
READ-BAT-CHECK: HEATMAP: building heat map
|
||||
READ-BAT-CHECK: BOLT-INFO: 5 out of 21 functions in the binary (23.8%) have non-empty execution profile
|
||||
READ-BAT-FDATA-CHECK: BOLT-INFO: 5 out of 16 functions in the binary (31.2%) have non-empty execution profile
|
||||
CHECK-HM: .text, 0x800000, 0x8002cc, 38.7595, 91.6667, 0.3553
|
||||
|
||||
YAML-BAT-CHECK: functions:
|
||||
# Function not covered by BAT - has insns in basic block
|
||||
|
||||
@@ -11,12 +11,15 @@ REQUIRES: system-linux
|
||||
|
||||
RUN: yaml2obj %p/Inputs/blarge.yaml &> %t.exe
|
||||
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
|
||||
RUN: --show-density \
|
||||
RUN: --show-density --heatmap %t.hm \
|
||||
RUN: --profile-density-threshold=9 --profile-density-cutoff-hot=970000 \
|
||||
RUN: --profile-use-dfs | FileCheck %s --check-prefix=CHECK-P2B
|
||||
RUN: FileCheck --input-file %t.hm-section-hotness.csv --check-prefix=CHECK-HM %s
|
||||
|
||||
CHECK-P2B: HEATMAP: building heat map
|
||||
CHECK-P2B: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
|
||||
CHECK-P2B: BOLT-INFO: Functions with density >= 21.7 account for 97.00% total sample counts.
|
||||
CHECK-HM: .text, 0x400680, 0x401232, 100.0000, 4.2553, 0.0426
|
||||
|
||||
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated.txt -w %t.new \
|
||||
RUN: --show-density \
|
||||
|
||||
@@ -4,12 +4,19 @@ REQUIRES: system-linux, perf
|
||||
|
||||
RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -Wl,--script=%S/Inputs/perf_test.lds -o %t
|
||||
RUN: perf record -Fmax -e cycles:u -o %t2 -- %t
|
||||
RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --show-density 2>&1 | FileCheck %s
|
||||
RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --show-density \
|
||||
RUN: --heatmap %t.hm 2>&1 | FileCheck %s
|
||||
RUN: FileCheck %s --input-file %t.hm-section-hotness.csv --check-prefix=CHECK-HM
|
||||
|
||||
CHECK-NOT: PERF2BOLT-ERROR
|
||||
CHECK-NOT: !! WARNING !! This high mismatch ratio indicates the input binary is probably not the same binary used during profiling collection.
|
||||
CHECK: HEATMAP: building heat map
|
||||
CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total sample counts.
|
||||
|
||||
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
|
||||
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
|
||||
RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id --show-density 2>&1 | FileCheck %s
|
||||
RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id --show-density \
|
||||
RUN: --heatmap %t.hm2 2>&1 | FileCheck %s
|
||||
RUN: FileCheck %s --input-file %t.hm2-section-hotness.csv --check-prefix=CHECK-HM
|
||||
|
||||
CHECK-HM: .text
|
||||
|
||||
@@ -66,7 +66,7 @@ int main(int argc, char **argv) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
opts::HeatmapMode = true;
|
||||
opts::HeatmapMode = opts::HM_Exclusive;
|
||||
opts::AggregateOnly = true;
|
||||
if (!sys::fs::exists(opts::InputFilename))
|
||||
report_error(opts::InputFilename, errc::no_such_file_or_directory);
|
||||
@@ -74,6 +74,7 @@ int main(int argc, char **argv) {
|
||||
// Output to stdout by default
|
||||
if (opts::OutputFilename.empty())
|
||||
opts::OutputFilename = "-";
|
||||
opts::HeatmapOutput.assign(opts::OutputFilename);
|
||||
|
||||
// Initialize targets and assembly printers/parsers.
|
||||
#define BOLT_TARGET(target) \
|
||||
|
||||
Reference in New Issue
Block a user