mirror of
https://github.com/intel/llvm.git
synced 2026-01-13 19:08:21 +08:00
[BOLT] Add --ba flag to deprecate --nl (#164257)
The `--nl` flag, originally for Non-LBR mode, is deprecated and will be replaced by `--basic-events` (alias `--ba`). `--nl` remains as a deprecated alias for backward compatibility.
This commit is contained in:
@@ -164,7 +164,7 @@ $ perf2bolt -p perf.data -o perf.fdata <executable>
|
||||
This command will aggregate branch data from `perf.data` and store it in a
|
||||
format that is both more compact and more resilient to binary modifications.
|
||||
|
||||
If the profile was collected without brstacks, you will need to add `-nl` flag to
|
||||
If the profile was collected without brstacks, you will need to add `-ba` flag to
|
||||
the command line above.
|
||||
|
||||
### Step 3: Optimize with BOLT
|
||||
|
||||
@@ -21,7 +21,7 @@ $ perf record -e cycles:u -j any,u [-p PID|-a] -- sleep <interval>
|
||||
```
|
||||
|
||||
Running with brstack (`-j any,u` or `-b`) is recommended. Heatmaps can be generated
|
||||
from basic events by using the llvm-bolt-heatmap option `-nl` (no brstack) but
|
||||
from basic events by using the llvm-bolt-heatmap option `-ba` (basic events) but
|
||||
such heatmaps do not have the coverage provided by brstack and may only be useful
|
||||
for finding event hotspots at larger code block granularities.
|
||||
|
||||
|
||||
@@ -205,8 +205,8 @@ This command will aggregate branch data from ``perf.data`` and store it
|
||||
in a format that is both more compact and more resilient to binary
|
||||
modifications.
|
||||
|
||||
If the profile was collected without LBRs, you will need to add ``-nl``
|
||||
flag to the command line above.
|
||||
If the profile was collected without brstacks, you will need to add `-ba` flag to
|
||||
the command line above.
|
||||
|
||||
Step 3: Optimize with BOLT
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@@ -45,10 +45,23 @@ using namespace bolt;
|
||||
namespace opts {
|
||||
|
||||
static cl::opt<bool>
|
||||
BasicAggregation("nl",
|
||||
cl::desc("aggregate basic samples (without brstack info)"),
|
||||
BasicAggregation("basic-events",
|
||||
cl::desc("aggregate basic events (without brstack info)"),
|
||||
cl::cat(AggregatorCategory));
|
||||
|
||||
static cl::alias BasicAggregationAlias("ba",
|
||||
cl::desc("Alias for --basic-events"),
|
||||
cl::aliasopt(BasicAggregation));
|
||||
|
||||
static cl::opt<bool> DeprecatedBasicAggregationNl(
|
||||
"nl", cl::desc("Alias for --basic-events (deprecated. Use --ba)"),
|
||||
cl::cat(AggregatorCategory), cl::ReallyHidden,
|
||||
cl::callback([](const bool &Enabled) {
|
||||
errs()
|
||||
<< "BOLT-WARNING: '-nl' is deprecated, please use '--ba' instead.\n";
|
||||
BasicAggregation = Enabled;
|
||||
}));
|
||||
|
||||
cl::opt<bool> ArmSPE("spe", cl::desc("Enable Arm SPE mode."),
|
||||
cl::cat(AggregatorCategory));
|
||||
|
||||
@@ -1433,7 +1446,7 @@ std::error_code DataAggregator::printLBRHeatMap() {
|
||||
"Cannot build heatmap.";
|
||||
} else {
|
||||
errs() << "HEATMAP-ERROR: no brstack traces detected in profile. "
|
||||
"Cannot build heatmap. Use -nl for building heatmap from "
|
||||
"Cannot build heatmap. Use -ba for building heatmap from "
|
||||
"basic events.\n";
|
||||
}
|
||||
exit(1);
|
||||
@@ -1629,8 +1642,8 @@ std::error_code DataAggregator::parseBranchEvents() {
|
||||
<< "PERF2BOLT-WARNING: all recorded samples for this binary lack "
|
||||
"brstack. Record profile with perf record -j any or run "
|
||||
"perf2bolt "
|
||||
"in non-brstack mode with -nl (the performance improvement in "
|
||||
"-nl "
|
||||
"in non-brstack mode with -ba (the performance improvement in "
|
||||
"-ba "
|
||||
"mode may be limited)\n";
|
||||
else
|
||||
errs()
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
# RUN: FileCheck %s --input-file %t.fdata --check-prefix=CHECK-FDATA
|
||||
# RUN: llvm-strip --strip-unneeded %t.o
|
||||
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
|
||||
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --dyno-stats -nl \
|
||||
# RUN: llvm-bolt %t.exe -o %t.out --data %t.fdata --dyno-stats -ba \
|
||||
# RUN: --print-only=_start 2>&1 | FileCheck %s --check-prefix=CHECK-BOLT
|
||||
|
||||
# CHECK-FDATA: no_lbr
|
||||
|
||||
@@ -61,11 +61,11 @@ RUN: FileCheck %s -check-prefix=NEWFORMAT --input-file %t.bolt.yaml
|
||||
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
|
||||
RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
|
||||
RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba.nl \
|
||||
RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
|
||||
RUN: FileCheck %s --input-file %t.ba.nl --check-prefix CHECK-BASIC-NL
|
||||
RUN: -ba 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
|
||||
RUN: FileCheck %s --input-file %t.ba.nl --check-prefix CHECK-BASIC-BA
|
||||
BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
|
||||
BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
|
||||
CHECK-BASIC-NL: no_lbr cycles
|
||||
CHECK-BASIC-BA: no_lbr cycles
|
||||
|
||||
PERF2BOLT: 1 frame_dummy/1 1e 1 frame_dummy/1 0 0 1
|
||||
PERF2BOLT-NEXT: 1 main 451 1 SolveCubic 0 0 2
|
||||
|
||||
@@ -4,7 +4,7 @@ REQUIRES: system-linux, perf
|
||||
|
||||
RUN: %clang %S/Inputs/perf_test.c -fuse-ld=lld -pie -Wl,--script=%S/Inputs/perf_test.lds -o %t
|
||||
RUN: perf record -Fmax -e cycles:u -o %t2 -- %t
|
||||
RUN: perf2bolt %t -p=%t2 -o %t3 -nl -ignore-build-id --show-density \
|
||||
RUN: perf2bolt %t -p=%t2 -o %t3 -ba -ignore-build-id --show-density \
|
||||
RUN: --heatmap %t.hm 2>&1 | FileCheck %s
|
||||
RUN: FileCheck %s --input-file %t.hm-section-hotness.csv --check-prefix=CHECK-HM
|
||||
|
||||
@@ -15,7 +15,7 @@ CHECK: BOLT-INFO: Functions with density >= {{.*}} account for 99.00% total samp
|
||||
|
||||
RUN: %clang %S/Inputs/perf_test.c -no-pie -fuse-ld=lld -o %t4
|
||||
RUN: perf record -Fmax -e cycles:u -o %t5 -- %t4
|
||||
RUN: perf2bolt %t4 -p=%t5 -o %t6 -nl -ignore-build-id --show-density \
|
||||
RUN: perf2bolt %t4 -p=%t5 -o %t6 -ba -ignore-build-id --show-density \
|
||||
RUN: --heatmap %t.hm2 2>&1 | FileCheck %s
|
||||
RUN: FileCheck %s --input-file %t.hm2-section-hotness.csv --check-prefix=CHECK-HM
|
||||
|
||||
|
||||
@@ -133,7 +133,7 @@ def perf2bolt(args):
|
||||
"--profile-format=yaml",
|
||||
]
|
||||
if not opts.lbr:
|
||||
p2b_args += ["-nl"]
|
||||
p2b_args += ["-ba"]
|
||||
p2b_args += ["-p"]
|
||||
for filename in findFilesWithExtension(opts.path, "perf.data"):
|
||||
subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
|
||||
@@ -722,7 +722,7 @@ def bolt_optimize(args):
|
||||
"-dyno-stats",
|
||||
"-use-gnu-stack",
|
||||
"-update-debug-sections",
|
||||
"-nl" if opts.method == "PERF" else "",
|
||||
"-ba" if opts.method == "PERF" else "",
|
||||
]
|
||||
print("Running: " + " ".join(args))
|
||||
process = subprocess.run(
|
||||
|
||||
Reference in New Issue
Block a user