mirror of
https://github.com/intel/llvm.git
synced 2026-01-28 01:04:49 +08:00
[FuncSpec] Enable specialization of literal constants.
To do so we have to tweak the cost model such that specialization does not trigger excessively. Differential Revision: https://reviews.llvm.org/D150649
This commit is contained in:
@@ -188,6 +188,8 @@ public:
|
||||
|
||||
bool run();
|
||||
|
||||
static unsigned getBlockFreqMultiplier();
|
||||
|
||||
InstCostVisitor getInstCostVisitorFor(Function *F) {
|
||||
auto &BFI = (GetBFI)(*F);
|
||||
auto &TTI = (GetTTI)(*F);
|
||||
|
||||
@@ -74,6 +74,22 @@ static cl::opt<bool> ForceSpecialization(
|
||||
"Force function specialization for every call site with a constant "
|
||||
"argument"));
|
||||
|
||||
// Set to 2^3 to model three levels of if-else nest.
|
||||
static cl::opt<unsigned> BlockFreqMultiplier(
|
||||
"funcspec-block-freq-multiplier", cl::init(8), cl::Hidden, cl::desc(
|
||||
"Multiplier to scale block frequency of user instructions during "
|
||||
"specialization bonus estimation"));
|
||||
|
||||
static cl::opt<unsigned> MinEntryFreq(
|
||||
"funcspec-min-entry-freq", cl::init(450), cl::Hidden, cl::desc(
|
||||
"Do not specialize functions with entry block frequency lower than "
|
||||
"this value"));
|
||||
|
||||
static cl::opt<unsigned> MinScore(
|
||||
"funcspec-min-score", cl::init(2), cl::Hidden, cl::desc(
|
||||
"Do not specialize functions with score lower than this value "
|
||||
"(the ratio of specialization bonus over specialization cost)"));
|
||||
|
||||
static cl::opt<unsigned> MaxClones(
|
||||
"funcspec-max-clones", cl::init(3), cl::Hidden, cl::desc(
|
||||
"The maximum number of clones allowed for a single function "
|
||||
@@ -88,15 +104,15 @@ static cl::opt<bool> SpecializeOnAddress(
|
||||
"funcspec-on-address", cl::init(false), cl::Hidden, cl::desc(
|
||||
"Enable function specialization on the address of global values"));
|
||||
|
||||
// Disabled by default as it can significantly increase compilation times.
|
||||
//
|
||||
// https://llvm-compile-time-tracker.com
|
||||
// https://github.com/nikic/llvm-compile-time-tracker
|
||||
static cl::opt<bool> SpecializeLiteralConstant(
|
||||
"funcspec-for-literal-constant", cl::init(false), cl::Hidden, cl::desc(
|
||||
"funcspec-for-literal-constant", cl::init(true), cl::Hidden, cl::desc(
|
||||
"Enable specialization of functions that take a literal constant as an "
|
||||
"argument"));
|
||||
|
||||
unsigned FunctionSpecializer::getBlockFreqMultiplier() {
|
||||
return BlockFreqMultiplier;
|
||||
}
|
||||
|
||||
// Estimates the instruction cost of all the basic blocks in \p WorkList.
|
||||
// The successors of such blocks are added to the list as long as they are
|
||||
// executable and they have a unique predecessor. \p WorkList represents
|
||||
@@ -114,7 +130,8 @@ static Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList,
|
||||
while (!WorkList.empty()) {
|
||||
BasicBlock *BB = WorkList.pop_back_val();
|
||||
|
||||
uint64_t Weight = BFI.getBlockFreq(BB).getFrequency() /
|
||||
uint64_t Weight = BlockFreqMultiplier *
|
||||
BFI.getBlockFreq(BB).getFrequency() /
|
||||
BFI.getEntryFreq();
|
||||
if (!Weight)
|
||||
continue;
|
||||
@@ -167,7 +184,8 @@ Cost InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
|
||||
|
||||
KnownConstants.insert({User, C});
|
||||
|
||||
uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
|
||||
uint64_t Weight = BlockFreqMultiplier *
|
||||
BFI.getBlockFreq(User->getParent()).getFrequency() /
|
||||
BFI.getEntryFreq();
|
||||
if (!Weight)
|
||||
return 0;
|
||||
@@ -649,6 +667,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
|
||||
if (Args.empty())
|
||||
return false;
|
||||
|
||||
bool HasCheckedEntryFreq = false;
|
||||
for (User *U : F->users()) {
|
||||
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
|
||||
continue;
|
||||
@@ -684,6 +703,21 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
|
||||
if (S.Args.empty())
|
||||
continue;
|
||||
|
||||
// Check the function entry frequency only once. We sink this code here to
|
||||
// postpone running the Block Frequency Analysis until we know for sure
|
||||
// there are Specialization candidates, otherwise we are adding unnecessary
|
||||
// overhead.
|
||||
if (!HasCheckedEntryFreq) {
|
||||
// Reject cold functions (for some definition of 'cold').
|
||||
uint64_t EntryFreq = (GetBFI)(*F).getEntryFreq();
|
||||
if (!ForceSpecialization && EntryFreq < MinEntryFreq)
|
||||
return false;
|
||||
|
||||
HasCheckedEntryFreq = true;
|
||||
LLVM_DEBUG(dbgs() << "FnSpecialization: Entry block frequency for "
|
||||
<< F->getName() << " = " << EntryFreq << "\n");
|
||||
}
|
||||
|
||||
// Check if we have encountered the same specialisation already.
|
||||
if (auto It = UniqueSpecs.find(S); It != UniqueSpecs.end()) {
|
||||
// Existing specialisation. Add the call to the list to rewrite, unless
|
||||
@@ -698,13 +732,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, Cost SpecCost,
|
||||
AllSpecs[Index].CallSites.push_back(&CS);
|
||||
} else {
|
||||
// Calculate the specialisation gain.
|
||||
Cost Score = 0 - SpecCost;
|
||||
Cost Score = 0;
|
||||
InstCostVisitor Visitor = getInstCostVisitorFor(F);
|
||||
for (ArgInfo &A : S.Args)
|
||||
Score += getSpecializationBonus(A.Formal, A.Actual, Visitor);
|
||||
Score /= SpecCost;
|
||||
|
||||
// Discard unprofitable specialisations.
|
||||
if (!ForceSpecialization && Score <= 0)
|
||||
if (!ForceSpecialization && Score < MinScore)
|
||||
continue;
|
||||
|
||||
// Create a new specialisation entry.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -S --passes="default<O3>" < %s | FileCheck %s
|
||||
; RUN: opt -S --passes="default<O3>" -force-specialization < %s | FileCheck %s
|
||||
|
||||
define dso_local i32 @g0(i32 noundef %x) local_unnamed_addr {
|
||||
entry:
|
||||
|
||||
@@ -1,11 +1,9 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
|
||||
; Test function specialization wouldn't crash due to constant expression.
|
||||
; Note that this test case shows that function specialization pass would
|
||||
; transform the function even if no specialization happened.
|
||||
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
|
||||
%struct = type { i8, i16, i32, i64, i64}
|
||||
@Global = internal constant %struct {i8 0, i16 1, i32 2, i64 3, i64 4}
|
||||
|
||||
@@ -26,19 +24,6 @@ entry:
|
||||
}
|
||||
|
||||
define internal i64 @zoo(i1 %flag) {
|
||||
; CHECK-LABEL: @zoo(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
|
||||
; CHECK: plus:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
|
||||
; CHECK-NEXT: br label [[MERGE:%.*]]
|
||||
; CHECK: minus:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 4))
|
||||
; CHECK-NEXT: br label [[MERGE]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = phi i64 [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3) to i64), [[PLUS]] ], [ ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), [[MINUS]] ]
|
||||
; CHECK-NEXT: ret i64 [[TMP2]]
|
||||
;
|
||||
entry:
|
||||
br i1 %flag, label %plus, label %minus
|
||||
|
||||
@@ -60,10 +45,9 @@ merge:
|
||||
|
||||
define i64 @main() {
|
||||
; CHECK-LABEL: @main(
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo(i1 false)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo(i1 true)
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[TMP1]], [[TMP2]]
|
||||
; CHECK-NEXT: ret i64 [[TMP3]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @zoo.4(i1 false)
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @zoo.3(i1 true)
|
||||
; CHECK-NEXT: ret i64 add (i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4) to i64), i64 ptrtoint (ptr getelementptr inbounds ([[STRUCT]], ptr @Global, i32 0, i32 3) to i64))
|
||||
;
|
||||
%1 = call i64 @zoo(i1 0)
|
||||
%2 = call i64 @zoo(i1 1)
|
||||
@@ -71,3 +55,29 @@ define i64 @main() {
|
||||
ret i64 %3
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @func2.1(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret i64 undef
|
||||
|
||||
; CHECK-LABEL: @func2.2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: ret i64 undef
|
||||
|
||||
; CHECK-LABEL: @zoo.3(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[PLUS:%.*]]
|
||||
; CHECK: plus:
|
||||
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @func2.2(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 3))
|
||||
; CHECK-NEXT: br label [[MERGE:%.*]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: ret i64 undef
|
||||
|
||||
; CHECK-LABEL: @zoo.4(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br label [[MINUS:%.*]]
|
||||
; CHECK: minus:
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = call i64 @func2.1(ptr getelementptr inbounds ([[STRUCT:%.*]], ptr @Global, i32 0, i32 4))
|
||||
; CHECK-NEXT: br label [[MERGE:%.*]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: ret i64 undef
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
|
||||
; Checks for callsites that have been annotated with MinSize. We only expect
|
||||
; specialisation for the call that does not have the attribute:
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<no-func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s --check-prefix=NOFSPEC
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<no-func-spec>" -force-specialization -S < %s | FileCheck %s --check-prefix=NOFSPEC
|
||||
|
||||
define i64 @main(i64 %x, i1 %flag) {
|
||||
;
|
||||
|
||||
@@ -1,88 +0,0 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
|
||||
|
||||
; DISABLED-NOT: @func.1(
|
||||
; DISABLED-NOT: @func.2(
|
||||
|
||||
define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
|
||||
%4 = alloca i32, align 4
|
||||
store i32 %1, ptr %4, align 4
|
||||
%5 = load i32, ptr %4, align 4
|
||||
%6 = icmp slt i32 %5, 1
|
||||
br i1 %6, label %14, label %7
|
||||
|
||||
7: ; preds = %3
|
||||
%8 = load i32, ptr %4, align 4
|
||||
%9 = sext i32 %8 to i64
|
||||
%10 = getelementptr inbounds i32, ptr %0, i64 %9
|
||||
call void %2(ptr %10)
|
||||
%11 = load i32, ptr %4, align 4
|
||||
%12 = add nsw i32 %11, -1
|
||||
%13 = call i32 @func(ptr %0, i32 %12, ptr %2)
|
||||
br label %14
|
||||
|
||||
14: ; preds = %3, %7
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define internal void @increment(ptr nocapture %0) {
|
||||
%2 = load i32, ptr %0, align 4
|
||||
%3 = add nsw i32 %2, 1
|
||||
store i32 %3, ptr %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @decrement(ptr nocapture %0) {
|
||||
%2 = load i32, ptr %0, align 4
|
||||
%3 = add nsw i32 %2, -1
|
||||
store i32 %3, ptr %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @main(ptr %0, i32 %1) {
|
||||
; CHECK: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
|
||||
%3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
|
||||
; CHECK: call void @func.1(ptr [[TMP0]], i32 0)
|
||||
%4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
|
||||
; CHECK: ret i32 0
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
; CHECK: @func.1(
|
||||
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
|
||||
; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
|
||||
; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
|
||||
; CHECK: 6:
|
||||
; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
|
||||
; CHECK: call void @decrement(ptr [[TMP9]])
|
||||
; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
|
||||
; CHECK: call void @func.1(ptr [[TMP0]], i32 [[TMP11]])
|
||||
; CHECK: br label [[TMP12:%.*]]
|
||||
; CHECK: 12:
|
||||
; CHECK: ret void
|
||||
;
|
||||
;
|
||||
; CHECK: @func.2(
|
||||
; CHECK: [[TMP3:%.*]] = alloca i32, align 4
|
||||
; CHECK: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
|
||||
; CHECK: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
|
||||
; CHECK: 6:
|
||||
; CHECK: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; CHECK: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
|
||||
; CHECK: call void @increment(ptr [[TMP9]])
|
||||
; CHECK: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; CHECK: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
|
||||
; CHECK: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
|
||||
; CHECK: br label [[TMP12:%.*]]
|
||||
; CHECK: 12:
|
||||
; CHECK: ret void
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" -force-specialization < %s | FileCheck %s
|
||||
define dso_local i32 @p0(i32 noundef %x) {
|
||||
entry:
|
||||
%add = add nsw i32 %x, 1
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 < %s | FileCheck %s
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization < %s | FileCheck %s
|
||||
|
||||
define internal i32 @f(i32 noundef %x, ptr nocapture noundef readonly %p, ptr nocapture noundef readonly %q) noinline {
|
||||
entry:
|
||||
%call = tail call i32 %p(i32 noundef %x)
|
||||
|
||||
@@ -6,10 +6,10 @@ define i64 @main(i64 %x, i64 %y, i1 %flag) {
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
|
||||
; CHECK: plus:
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 [[Y:%.*]], ptr @plus, ptr @minus)
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @compute.2(i64 [[X:%.*]], i64 42, ptr @plus, ptr @minus)
|
||||
; CHECK-NEXT: br label [[MERGE:%.*]]
|
||||
; CHECK: minus:
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y]], ptr @minus, ptr @plus)
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @compute.3(i64 [[X]], i64 [[Y:%.*]], ptr @minus, ptr @plus)
|
||||
; CHECK-NEXT: br label [[MERGE]]
|
||||
; CHECK: merge:
|
||||
; CHECK-NEXT: [[PH:%.*]] = phi i64 [ [[CMP0]], [[PLUS]] ], [ [[CMP1]], [[MINUS]] ]
|
||||
@@ -20,7 +20,7 @@ entry:
|
||||
br i1 %flag, label %plus, label %minus
|
||||
|
||||
plus:
|
||||
%cmp0 = call i64 @compute(i64 %x, i64 %y, ptr @plus, ptr @minus)
|
||||
%cmp0 = call i64 @compute(i64 %x, i64 42, ptr @plus, ptr @minus)
|
||||
br label %merge
|
||||
|
||||
minus:
|
||||
@@ -68,9 +68,9 @@ entry:
|
||||
|
||||
; CHECK-LABEL: @compute.2
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 [[Y:%.*]])
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 [[Y]])
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], ptr @plus, ptr @plus)
|
||||
; CHECK-NEXT: [[CMP0:%.*]] = call i64 @plus(i64 [[X:%.*]], i64 42)
|
||||
; CHECK-NEXT: [[CMP1:%.*]] = call i64 @minus(i64 [[X]], i64 42)
|
||||
; CHECK-NEXT: [[CMP2:%.*]] = call i64 @compute.1(i64 [[X]], i64 42, ptr @plus, ptr @plus)
|
||||
|
||||
; CHECK-LABEL: @compute.3
|
||||
; CHECK-NEXT: entry:
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" \
|
||||
; RUN: -funcspec-for-literal-constant=0 \
|
||||
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-NOLIT
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" \
|
||||
; RUN: -funcspec-for-literal-constant \
|
||||
; RUN: -funcspec-for-literal-constant=1 \
|
||||
; RUN: -force-specialization < %s | FileCheck %s -check-prefix CHECK-LIT
|
||||
|
||||
define i32 @f0(i32 noundef %x) {
|
||||
|
||||
110
llvm/test/Transforms/FunctionSpecialization/max-iters.ll
Normal file
110
llvm/test/Transforms/FunctionSpecialization/max-iters.ll
Normal file
@@ -0,0 +1,110 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=1 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS1
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=2 -force-specialization -S < %s | FileCheck %s --check-prefixes=COMMON,ITERS2
|
||||
; RUN: opt -passes="ipsccp<func-spec>,deadargelim" -funcspec-max-iters=0 -force-specialization -S < %s | FileCheck %s --check-prefix=DISABLED
|
||||
|
||||
; DISABLED-NOT: @func.1(
|
||||
; DISABLED-NOT: @func.2(
|
||||
; DISABLED-NOT: @func.3(
|
||||
|
||||
define internal i32 @func(ptr %0, i32 %1, ptr nocapture %2) {
|
||||
%4 = alloca i32, align 4
|
||||
store i32 %1, ptr %4, align 4
|
||||
%5 = load i32, ptr %4, align 4
|
||||
%6 = icmp slt i32 %5, 1
|
||||
br i1 %6, label %14, label %7
|
||||
|
||||
7: ; preds = %3
|
||||
%8 = load i32, ptr %4, align 4
|
||||
%9 = sext i32 %8 to i64
|
||||
%10 = getelementptr inbounds i32, ptr %0, i64 %9
|
||||
call void %2(ptr %10)
|
||||
%11 = load i32, ptr %4, align 4
|
||||
%12 = add nsw i32 %11, -1
|
||||
%13 = call i32 @func(ptr %0, i32 %12, ptr %2)
|
||||
br label %14
|
||||
|
||||
14: ; preds = %3, %7
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define internal void @increment(ptr nocapture %0) {
|
||||
%2 = load i32, ptr %0, align 4
|
||||
%3 = add nsw i32 %2, 1
|
||||
store i32 %3, ptr %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @decrement(ptr nocapture %0) {
|
||||
%2 = load i32, ptr %0, align 4
|
||||
%3 = add nsw i32 %2, -1
|
||||
store i32 %3, ptr %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @main(ptr %0, i32 %1) {
|
||||
; COMMON: define i32 @main(
|
||||
; COMMON-NEXT: call void @func.2(ptr [[TMP0:%.*]], i32 [[TMP1:%.*]])
|
||||
; COMMON-NEXT: call void @func.1(ptr [[TMP0]])
|
||||
; COMMON-NEXT: ret i32 0
|
||||
;
|
||||
%3 = call i32 @func(ptr %0, i32 %1, ptr nonnull @increment)
|
||||
%4 = call i32 @func(ptr %0, i32 %3, ptr nonnull @decrement)
|
||||
ret i32 %4
|
||||
}
|
||||
|
||||
; COMMON: define internal void @func.1(
|
||||
; COMMON-NEXT: [[TMP2:%.*]] = alloca i32, align 4
|
||||
; COMMON-NEXT: store i32 0, ptr [[TMP2]], align 4
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP3]], 1
|
||||
; COMMON-NEXT: br i1 [[TMP4]], label [[TMP11:%.*]], label [[TMP5:%.*]]
|
||||
; COMMON: 5:
|
||||
; COMMON-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; COMMON-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64
|
||||
; COMMON-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP7]]
|
||||
; COMMON-NEXT: call void @decrement(ptr [[TMP8]])
|
||||
; COMMON-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP2]], align 4
|
||||
; COMMON-NEXT: [[TMP10:%.*]] = add nsw i32 [[TMP9]], -1
|
||||
; ITERS1-NEXT: call void @func(ptr [[TMP0]], i32 [[TMP10]], ptr @decrement)
|
||||
; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP10]])
|
||||
; COMMON-NEXT: br label [[TMP11:%.*]]
|
||||
; COMMON: 11:
|
||||
; COMMON-NEXT: ret void
|
||||
;
|
||||
; COMMON: define internal void @func.2(
|
||||
; COMMON-NEXT: [[TMP3:%.*]] = alloca i32, align 4
|
||||
; COMMON-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
|
||||
; COMMON-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; COMMON-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
|
||||
; COMMON-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
|
||||
; COMMON: 6:
|
||||
; COMMON-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; COMMON-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; COMMON-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
|
||||
; COMMON-NEXT: call void @increment(ptr [[TMP9]])
|
||||
; COMMON-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; COMMON-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
|
||||
; COMMON-NEXT: call void @func.2(ptr [[TMP0]], i32 [[TMP11]])
|
||||
; COMMON-NEXT: br label [[TMP12:%.*]]
|
||||
; COMMON: 12:
|
||||
; COMMON-NEXT: ret void
|
||||
;
|
||||
; ITERS2: define internal void @func.3(
|
||||
; ITERS2-NEXT: [[TMP3:%.*]] = alloca i32, align 4
|
||||
; ITERS2-NEXT: store i32 [[TMP1:%.*]], ptr [[TMP3]], align 4
|
||||
; ITERS2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; ITERS2-NEXT: [[TMP5:%.*]] = icmp slt i32 [[TMP4]], 1
|
||||
; ITERS2-NEXT: br i1 [[TMP5]], label [[TMP13:%.*]], label [[TMP6:%.*]]
|
||||
; ITERS2: 6:
|
||||
; ITERS2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; ITERS2-NEXT: [[TMP8:%.*]] = sext i32 [[TMP7]] to i64
|
||||
; ITERS2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 [[TMP8]]
|
||||
; ITERS2-NEXT: call void @decrement(ptr [[TMP9]])
|
||||
; ITERS2-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP3]], align 4
|
||||
; ITERS2-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], -1
|
||||
; ITERS2-NEXT: call void @func.3(ptr [[TMP0]], i32 [[TMP11]])
|
||||
; ITERS2-NEXT: br label [[TMP12:%.*]]
|
||||
; ITERS2: 12:
|
||||
; ITERS2-NEXT: ret void
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" < %s | FileCheck %s
|
||||
; RUN: opt -S --passes="ipsccp<func-spec>" -funcspec-min-entry-freq=1 < %s | FileCheck %s
|
||||
define dso_local i32 @p0(i32 noundef %x) {
|
||||
entry:
|
||||
%add = add nsw i32 %x, 1
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-min-function-size=3 -S < %s | FileCheck %s
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -force-specialization -S < %s | FileCheck %s
|
||||
|
||||
define i64 @main(i64 %x, i1 %flag) {
|
||||
entry:
|
||||
|
||||
@@ -1,20 +1,12 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=NONE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=ONE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=TWO
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -funcspec-min-function-size=14 -S < %s | FileCheck %s --check-prefix=THREE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=0 -force-specialization -S < %s | FileCheck %s --check-prefix=NONE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=1 -force-specialization -S < %s | FileCheck %s --check-prefix=ONE
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=2 -force-specialization -S < %s | FileCheck %s --check-prefix=TWO
|
||||
; RUN: opt -passes="ipsccp<func-spec>" -funcspec-max-clones=3 -force-specialization -S < %s | FileCheck %s --check-prefix=THREE
|
||||
|
||||
; Make sure that we iterate correctly after sorting the specializations:
|
||||
; FnSpecialization: Specializations for function compute
|
||||
; FnSpecialization: Gain = 608
|
||||
; FnSpecialization: FormalArg = binop1, ActualArg = power
|
||||
; FnSpecialization: FormalArg = binop2, ActualArg = mul
|
||||
; FnSpecialization: Gain = 982
|
||||
; FnSpecialization: FormalArg = binop1, ActualArg = plus
|
||||
; FnSpecialization: FormalArg = binop2, ActualArg = minus
|
||||
; FnSpecialization: Gain = 795
|
||||
; FnSpecialization: FormalArg = binop1, ActualArg = minus
|
||||
; FnSpecialization: FormalArg = binop2, ActualArg = power
|
||||
;
|
||||
; Score(@plus, @minus) > Score(42, @minus, @power) > Score(@power, @mul)
|
||||
|
||||
define i64 @main(i64 %x, i64 %y, i1 %flag) {
|
||||
; NONE-LABEL: @main(
|
||||
@@ -116,11 +108,11 @@ merge:
|
||||
;
|
||||
; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, ptr %binop1, ptr %binop2) {
|
||||
; THREE-NEXT: entry:
|
||||
; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
|
||||
; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 42)
|
||||
; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 42)
|
||||
; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
|
||||
; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
|
||||
; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
|
||||
; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], 42
|
||||
; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
|
||||
; THREE-NEXT: ret i64 [[TMP5]]
|
||||
; THREE-NEXT: }
|
||||
|
||||
@@ -85,7 +85,10 @@ protected:
|
||||
auto &TTI = FAM.getResult<TargetIRAnalysis>(*I.getFunction());
|
||||
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(*I.getFunction());
|
||||
|
||||
return BFI.getBlockFreq(I.getParent()).getFrequency() / BFI.getEntryFreq() *
|
||||
uint64_t Weight = FunctionSpecializer::getBlockFreqMultiplier() *
|
||||
BFI.getBlockFreq(I.getParent()).getFrequency() /
|
||||
BFI.getEntryFreq();
|
||||
return Weight *
|
||||
TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
|
||||
}
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user