[Coroutines] Conditional elide coroutines based on hot/cold information (#162276)

Unconditionally eliding all `[[clang::coro_await_elidable]]` coroutines
is not good.

For example,

```
Task bar();
Task foo(bool b) {
     if (b) [[unlikely]] {
         co_await bar();
     }
}
```

Assume Task is marked with `[[clang::coro_await_elidable]]`, now we will
always elide the call to `bar()` into the frame of `foo()`. But this may
be a regression instead of an optimization if `b` is always false.

This patch tries to mitigate the problem by leveraging hot/cold
information. This can be optimized further in the future but at least
this patch makes things better.

This patch was originally written by ChuanqiXu9 (#145831), but stalled
during PR review because the diagnostics were not integrated with
the existing optimization remarks. I rebased the original patch,
integrated it with the optimization remarks, and did a couple of smaller
cosmetic changes (e.g., made the test case expectations more
targeted, etc.)

Co-Authored-by: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
This commit is contained in:
Adrian Vogelsgesang
2025-10-09 08:31:44 -07:00
committed by GitHub
parent a03aef4fae
commit c4f36758b7
5 changed files with 68 additions and 5 deletions

View File

@@ -20,6 +20,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
@@ -555,6 +556,7 @@ public:
Argument(StringRef Key, bool B) : Key(Key), Val(B ? "true" : "false") {}
LLVM_ABI Argument(StringRef Key, DebugLoc dl);
LLVM_ABI Argument(StringRef Key, InstructionCost C);
LLVM_ABI Argument(StringRef Key, BranchProbability P);
};
/// \p PassName is the name of the pass emitting this diagnostic. \p

View File

@@ -273,6 +273,13 @@ DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
C.print(OS);
}
DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key,
BranchProbability P)
: Key(std::string(Key)) {
raw_string_ostream OS(Val);
P.print(OS);
}
DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, DebugLoc Loc)
: Key(std::string(Key)), Loc(Loc) {
if (Loc) {

View File

@@ -24,6 +24,9 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -33,6 +36,11 @@ using namespace llvm;
#define DEBUG_TYPE "coro-annotation-elide"
static cl::opt<float> CoroElideBranchRatio(
"coro-elide-branch-ratio", cl::init(0.55), cl::Hidden,
cl::desc("Minimum BranchProbability to consider a elide a coroutine."));
extern cl::opt<unsigned> MinBlockCounterExecution;
static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
for (Instruction &I : F->getEntryBlock())
if (!isa<AllocaInst>(&I))
@@ -145,6 +153,30 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
bool IsCallerPresplitCoroutine = Caller->isPresplitCoroutine();
bool HasAttr = CB->hasFnAttr(llvm::Attribute::CoroElideSafe);
if (IsCallerPresplitCoroutine && HasAttr) {
BranchProbability MinBranchProbability(
static_cast<int>(CoroElideBranchRatio * MinBlockCounterExecution),
MinBlockCounterExecution);
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*Caller);
auto Prob = BranchProbability::getBranchProbability(
BFI.getBlockFreq(CB->getParent()).getFrequency(),
BFI.getEntryFreq().getFrequency());
if (Prob < MinBranchProbability) {
ORE.emit([&]() {
return OptimizationRemarkMissed(
DEBUG_TYPE, "CoroAnnotationElideUnlikely", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' not elided in '"
<< ore::NV("caller", Caller->getName())
<< "' because of low probability: "
<< ore::NV("probability", Prob) << " (threshold: "
<< ore::NV("threshold", MinBranchProbability) << ")";
});
continue;
}
auto *CallerN = CG.lookup(*Caller);
auto *CallerC = CallerN ? CG.lookupSCC(*CallerN) : nullptr;
// If CallerC is nullptr, it means LazyCallGraph hasn't visited Caller
@@ -156,7 +188,7 @@ PreservedAnalyses CoroAnnotationElidePass::run(LazyCallGraph::SCC &C,
return OptimizationRemark(DEBUG_TYPE, "CoroAnnotationElide", Caller)
<< "'" << ore::NV("callee", Callee->getName())
<< "' elided in '" << ore::NV("caller", Caller->getName())
<< "'";
<< "' (probability: " << ore::NV("probability", Prob) << ")";
});
FAM.invalidate(*Caller, PreservedAnalyses::none());

View File

@@ -109,7 +109,7 @@ static cl::opt<float> MinRegionSizeRatio(
"outline candidate and original function"));
// Used to tune the minimum number of execution counts needed in the predecessor
// block to the cold edge. ie. confidence interval.
static cl::opt<unsigned>
cl::opt<unsigned>
MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
cl::desc("Minimum block executions to consider "
"its BranchProbabilityInfo valid"));

View File

@@ -1,4 +1,8 @@
; Testing elide performed its job for calls to coroutines marked safe.
; Coroutine calls marked with `coro_elide_safe` should be elided.
; Inside `caller`, we expect the `callee` coroutine to be elided.
; Inside `caller_conditional`, `callee` is only called on an unlikely
; path, hence we expect the `callee` coroutine NOT to be elided.
;
; RUN: opt < %s -S -passes='cgscc(coro-annotation-elide)' | FileCheck %s
%struct.Task = type { ptr }
@@ -57,7 +61,7 @@ define ptr @callee.noalloc(i8 %arg, ptr dereferenceable(32) align(8) %frame) {
; Function Attrs: presplitcoroutine
define ptr @caller() #0 {
entry:
%task = call ptr @callee(i8 0) #1
%task = call ptr @callee(i8 0) coro_elide_safe
ret ptr %task
; CHECK: %[[TASK:.+]] = alloca %struct.Task, align 8
; CHECK-NEXT: %[[FRAME:.+]] = alloca [32 x i8], align 8
@@ -69,6 +73,25 @@ entry:
; CHECK-NEXT: ret ptr %[[TASK]]
}
; CHECK-LABEL: define ptr @caller_conditional(i1 %cond)
; Function Attrs: presplitcoroutine
define ptr @caller_conditional(i1 %cond) #0 {
entry:
br i1 %cond, label %call, label %ret
call:
; CHECK-NOT: alloca
; CHECK-NOT: @llvm.coro.id({{.*}}, ptr @callee, {{.*}})
; CHECK: %task = call ptr @callee(i8 0)
; CHECK-NEXT: br label %ret
%task = call ptr @callee(i8 0) coro_elide_safe
br label %ret
ret:
%retval = phi ptr [ %task, %call ], [ null, %entry ]
ret ptr %retval
}
declare token @llvm.coro.id(i32, ptr, ptr, ptr)
declare ptr @llvm.coro.begin(token, ptr)
declare ptr @llvm.coro.frame()
@@ -76,4 +99,3 @@ declare ptr @llvm.coro.subfn.addr(ptr, i8)
declare i1 @llvm.coro.alloc(token)
attributes #0 = { presplitcoroutine }
attributes #1 = { coro_elide_safe }