mirror of
https://github.com/intel/llvm.git
synced 2026-01-22 23:49:22 +08:00
[MacroFusion] Limit the max fused number as 2 to reduce the dependency
This is the example:
int foo(int a, int b, int c, int d) {
return a + b + c + d;
}
And this is the Dependency Graph:
+------+ +------+ +------+ +------+
| A | | B | | C | | D |
+--+--++ +---+--+ +--+---+ +--+---+
^ ^ ^ ^ ^ ^
| | | | | |
| | | |New1 +--------------+
| | | | |
| | | | +--+---+
| |New2 | +-------+ ADD1 |
| | | +--+---+
| | | Fuse ^
| | +-------------+
| +------------+
| |
| Fuse +--+---+
+----------->+ ADD2 |
| +------+
+--+---+
| ADD3 |
+------+
We need also create an artificial edge from ADD1 to A if
https://reviews.llvm.org/D69998 is landed. That will force the Node A scheduled
before the ADD1 and ADD2. But in fact, it is ok to schedule the Node A
in-between ADD3 and ADD2, as ADD3 and ADD2 are NOT a fusion pair because
ADD2 has been matched to ADD1. We are creating these unnecessary dependency
edges that override the heuristics.
Differential Revision: https://reviews.llvm.org/D70066
This commit is contained in:
@@ -36,6 +36,21 @@ static bool isHazard(const SDep &Dep) {
|
||||
return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
|
||||
}
|
||||
|
||||
static SUnit *getPredClusterSU(const SUnit &SU) {
|
||||
for (const SDep &SI : SU.Preds)
|
||||
if (SI.isCluster())
|
||||
return SI.getSUnit();
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) {
|
||||
unsigned Num = 1;
|
||||
const SUnit *CurrentSU = &SU;
|
||||
while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++;
|
||||
return Num < FuseLimit;
|
||||
}
|
||||
|
||||
static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
|
||||
SUnit &SecondSU) {
|
||||
// Check that neither instr is already paired with another along the edge
|
||||
@@ -161,8 +176,10 @@ bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU)
|
||||
if (DepSU.isBoundaryNode())
|
||||
continue;
|
||||
|
||||
// Only chain two instructions together at most.
|
||||
const MachineInstr *DepMI = DepSU.getInstr();
|
||||
if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
|
||||
if (!hasLessThanNumFused(DepSU, 2) ||
|
||||
!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI))
|
||||
continue;
|
||||
|
||||
if (fuseInstructionPair(DAG, DepSU, AnchorSU))
|
||||
|
||||
@@ -1,21 +1,18 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+fuse-arith-logic -verify-misched -debug-only=machine-scheduler 2>&1 > /dev/null | FileCheck %s
|
||||
|
||||
; Verify that, the macro-fusion creates the necessary dependencies between SUs.
|
||||
; Verify that, the macro-fusion creates the necessary dependencies between SUs and
|
||||
; only 2 SU's are fused at most.
|
||||
define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) {
|
||||
entry:
|
||||
; CHECK: ********** MI Scheduling **********
|
||||
; CHECK-LABEL: %bb.0 entry
|
||||
; CHECK: Macro fuse: SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]])
|
||||
; CHECK: Bind SU([[SU1:[0-9]+]]) - SU([[SU4]])
|
||||
; CHECK: Macro fuse: SU([[SU5]]) - SU([[SU6:[0-9]+]])
|
||||
; CHECK: Bind SU([[SU0:[0-9]+]]) - SU([[SU5]])
|
||||
; CHECK: SU([[SU0]]): %{{[0-9]+}}:gpr32 = COPY $w3
|
||||
; CHECK-NOT: Macro fuse:
|
||||
; CHECK: SU([[SU1]]): %{{[0-9]+}}:gpr32 = COPY $w2
|
||||
; CHECK: SU([[SU4]]): %{{[0-9]+}}:gpr32 = nsw ADDWrr
|
||||
; CHECK: SU([[SU5]]): %{{[0-9]+}}:gpr32 = nsw ADDWrr
|
||||
; CHECK: SU([[SU6]]): %{{[0-9]+}}:gpr32 = nsw SUBWrr
|
||||
|
||||
%add = add nsw i32 %b, %a
|
||||
%add1 = add nsw i32 %add, %c
|
||||
%sub = sub nsw i32 %add1, %d
|
||||
|
||||
Reference in New Issue
Block a user