Additional LLVM patching

Add necessary LLVM patches.
This commit is contained in:
Mielczarek, Aleksander
2025-10-13 06:46:54 +00:00
committed by igcbot
parent 7d69a9f62e
commit 9d5bec3237
12 changed files with 993 additions and 0 deletions

View File

@ -0,0 +1,32 @@
From 84d340ed615c3601a2f46178acce2040d9d114f9 Mon Sep 17 00:00:00 2001
From: Victor Mustya <victor.mustya@intel.com>
Date: Mon, 31 Oct 2022 13:27:02 -0700
Subject: =?UTF-8?q?[Backport]=20When=20creating=20a=20stack=20space=20for?=
=?UTF-8?q?=20inlined=20byval=20args,=0A=20use=20the=20same=20addrspace=20?=
=?UTF-8?q?as=20the=20original=20argument.?=
From: Chang-Sun Lin Jr <chang-sun.lin.jr@intel.com>
---
llvm/lib/Transforms/Utils/InlineFunction.cpp | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 399c9a43793f..bfb027568227 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1599,6 +1599,12 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg,
Arg->getName(), &*Caller->begin()->begin());
IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
+ // If the byval was in a different address space, add a cast.
+ if (DL.getAllocaAddrSpace() != Arg->getType()->getPointerAddressSpace()) {
+ NewAlloca = new AddrSpaceCastInst(
+ NewAlloca, Arg->getType(), "",
+ cast<Instruction>(NewAlloca)->getNextNonDebugInstruction());
+ }
// Uses of the argument in the function should use our new alloca
// instead.
return NewAlloca;
--
2.43.0

View File

@ -0,0 +1,28 @@
From 881bf715f06201a57a4f1a60155b556fedd556db Mon Sep 17 00:00:00 2001
From: Victor Mustya <victor.mustya@intel.com>
Date: Tue, 22 Aug 2023 11:10:30 -0700
Subject: [PATCH] Don't emit bitreverse or bswap intrinsics of illegal bit
width during instcombine
---
llvm/lib/Transforms/Utils/Local.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index b2ed95b05..476a5c4c1 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3312,6 +3312,10 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
if (DemandedBW > ITy->getScalarSizeInBits())
return false;
+ auto &DL = I->getModule()->getDataLayout();
+ if (DL.isIllegalInteger(DemandedBW))
+ return false;
+
// Now, is the bit permutation correct for a bswap or a bitreverse? We can
// only byteswap values with an even number of bytes.
APInt DemandedMask = APInt::getAllOnes(DemandedBW);
--
2.43.0

View File

@ -0,0 +1,71 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2024 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
From 58b5b7d4ed6204f61feeda68c7c1abe24bc143b1 Mon Sep 17 00:00:00 2001
From: Victor Mustya <victor.mustya@intel.com>
Date: Tue, 16 Jan 2024 14:13:05 -0800
Subject: [InstCombine] Only fold bitcast(fptrunc) if destination type matches
fptrunc result type. (#77046)
It's not enough to just make sure destination type is floating point,
because the following chain may be incorrectly optimized:
```LLVM
%trunc = fptrunc float %src to bfloat
%cast = bitcast bfloat %trunc to half
```
Before the fix, the instruction sequence mentioned above used to be
translated into single fptrunc instruction as follows:
```LLVM
%trunc = fptrunc float %src to half
```
Such transformation was semantically incorrect.
---
llvm/lib/IR/Instructions.cpp | 4 ++--
llvm/test/Transforms/InstCombine/fptrunc.ll | 13 +++++++++++++
2 files changed, 15 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index 7c343a0ff..932fc66a8 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -3218,8 +3218,8 @@ unsigned CastInst::isEliminableCastPair(
return 0;
case 4:
// No-op cast in second op implies firstOp as long as the DestTy
- // is floating point.
- if (DstTy->isFloatingPointTy())
+ // matches MidTy.
+ if (DstTy == MidTy)
return firstOp;
return 0;
case 5:
diff --git a/llvm/test/Transforms/InstCombine/fptrunc.ll b/llvm/test/Transforms/InstCombine/fptrunc.ll
index d3e153f12..c78df0b83 100644
--- a/llvm/test/Transforms/InstCombine/fptrunc.ll
+++ b/llvm/test/Transforms/InstCombine/fptrunc.ll
@@ -190,3 +190,16 @@ define half @ItoFtoF_u25_f32_f16(i25 %i) {
%r = fptrunc float %x to half
ret half %r
}
+
+; Negative test - bitcast bfloat to half is not optimized
+
+define half @fptrunc_to_bfloat_bitcast_to_half(float %src) {
+; CHECK-LABEL: @fptrunc_to_bfloat_bitcast_to_half(
+; CHECK-NEXT: [[TRUNC:%.*]] = fptrunc float [[SRC:%.*]] to bfloat
+; CHECK-NEXT: [[CAST:%.*]] = bitcast bfloat [[TRUNC]] to half
+; CHECK-NEXT: ret half [[CAST]]
+;
+ %trunc = fptrunc float %src to bfloat
+ %cast = bitcast bfloat %trunc to half
+ ret half %cast
+}
--
2.34.1

View File

@ -0,0 +1,99 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2025 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
/*========================== begin_copyright_notice ============================
Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
See https://llvm.org/LICENSE.txt for license information.
SPDX-License-Identifier: Apache-2.0 with LLVM-exception
============================= end_copyright_notice ===========================*/
From c5c679933c462f28fac7358841a23ee32c292a47 Mon Sep 17 00:00:00 2001
From: peterbell10 <peterbell10@openai.com>
Date: Wed, 20 Nov 2024 21:06:57 +0000
Subject: [PATCH] [InstCombine] Only fold extract element to trunc if vector
`hasOneUse` (#115627)
This fixes a missed optimization caused by the `foldBitcastExtElt`
pattern interfering with other combine patterns. In the case I was
hitting, we have IR that combines two vectors into a new larger vector
by extracting elements and inserting them into the new vector.
```llvm
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
%avec = bitcast i32 %a to <2 x half>
%a0 = extractelement <2 x half> %avec, i32 0
%a1 = extractelement <2 x half> %avec, i32 1
%bvec = bitcast i32 %b to <2 x half>
%b0 = extractelement <2 x half> %bvec, i32 0
%b1 = extractelement <2 x half> %bvec, i32 1
%ins0 = insertelement <4 x half> undef, half %a0, i32 0
%ins1 = insertelement <4 x half> %ins0, half %a1, i32 1
%ins2 = insertelement <4 x half> %ins1, half %b0, i32 2
%ins3 = insertelement <4 x half> %ins2, half %b1, i32 3
ret <4 x half> %ins3
}
```
With the current behavior, `InstCombine` converts each vector extract
sequence to
```llvm
%tmp = trunc i32 %a to i16
%a0 = bitcast i16 %tmp to half
%a1 = extractelement <2 x half> %avec, i32 1
```
where the extraction of `%a0` is now done by truncating the original
integer. While on it's own this is fairly reasonable, in this case it
also blocks the pattern which converts `extractelement` -
`insertelement` into shuffles which gives the overall simpler result:
```llvm
define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
%avec = bitcast i32 %a to <2 x half>
%bvec = bitcast i32 %b to <2 x half>
%ins3 = shufflevector <2 x half> %avec, <2 x half> %bvec, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x half> %ins3
}
```
In this PR I fix the conflict by obeying the `hasOneUse` check even if
there is no shift instruction required. In these cases we can't remove
the vector completely, so the pattern has less benefit anyway.
Also fwiw, I think dropping the `hasOneUse` check for the 0th element
might have been a mistake in the first place. Looking at
https://github.com/llvm/llvm-project/commit/535c5d56a7bc9966036a11362d8984983a4bf090
the commit message only mentions loosening the `isDesirableIntType`
requirement and doesn't mention changing the `hasOneUse` check at all.
---
llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 61e62adbe327..d3b30848ab8b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -202,9 +202,9 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
if (IsBigEndian)
ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
unsigned ShiftAmountC = ExtIndexC * DestWidth;
- if (!ShiftAmountC ||
- (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
- Ext.getVectorOperand()->hasOneUse())) {
+ if ((!ShiftAmountC ||
+ isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
+ Ext.getVectorOperand()->hasOneUse()) {
if (ShiftAmountC)
X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
if (DestTy->isFloatingPointTy()) {
--
2.43.0

View File

@ -0,0 +1,46 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2024 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
/*========================== begin_copyright_notice ============================
Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
See https://llvm.org/LICENSE.txt for license information.
SPDX-License-Identifier: Apache-2.0 with LLVM-exception
============================= end_copyright_notice ===========================*/
The reason for removing unreachable blocks is this change in the LLVM repo:
https://github.com/llvm/llvm-project/commit/1065f3439bad59323f16e7c8ee568c7d94dcd952
LowerSwitchPass can leave phi instructions with nodes from unreachable basic blocks
which is a disallowed state for DomTree.
diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 227de425ff85..8d089c2a754c 100644
--- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -38,6 +38,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -556,6 +557,10 @@ bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) {
DeleteDeadBlock(BB);
}
+ if (!DeleteList.empty()) {
+ removeUnreachableBlocks(F);
+ }
+
return Changed;
}
--
2.34.1

View File

@ -0,0 +1,46 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2025 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
The reason for removal of below condition was that it took a very strict
approach to the Convergent attribute, which caused missed optimization
opportunities in cases where it was safe to do so.
The decision is based on the discussion in LLVM RFC
https://reviews.llvm.org/D90361?id=303195
This patch should be considered obsolete if LICM introduces a more
advanced approach to the Convergent attribute in the future version of
LLVM.
---
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 2865dece8..f879176b3 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1202,8 +1202,18 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
// inter-thread communication which results are implicitly affected by the
// enclosing control flows. It is not safe to hoist or sink such operations
// across control flow.
- if (CI->isConvergent())
- return false;
+
+ // The reason for removal of below condition was that it took a very strict
+ // approach to the Convergent attribute, which caused missed optimization
+ // opportunities in cases where it was safe to do so.
+ // The decision is based on the discussion in LLVM RFC
+ // https://reviews.llvm.org/D90361?id=303195
+ // This patch should be considered obsolete if LICM introduces a more
+ // advanced approach to the Convergent attribute in the future version of
+ // LLVM.
+
+ //if (CI->isConvergent())
+ // return false;
using namespace PatternMatch;
if (match(CI, m_Intrinsic<Intrinsic::assume>()))
--
2.43.0

View File

@ -0,0 +1,44 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2025 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
/*========================== begin_copyright_notice ============================
Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
See https://llvm.org/LICENSE.txt for license information.
SPDX-License-Identifier: Apache-2.0 with LLVM-exception
============================= end_copyright_notice ===========================*/
This comes from patches of LLVM 14 (copy-pasted from there)
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 9beb2281c..a3cc73ca5 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -968,7 +968,7 @@ bool llvm::computeUnrollCount(
// cost of exact full unrolling. As such, if we have an exact count and
// found it unprofitable, we'll never chose to bounded unroll.
if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
- MaxTripCount <= UnrollMaxUpperBound) {
+ MaxTripCount < std::max(16U, UnrollMaxUpperBound.getValue())) {
UP.Count = MaxTripCount;
if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
MaxTripCount, UCE, UP)) {
@@ -1042,7 +1042,8 @@ bool llvm::computeUnrollCount(
}
// Don't unroll a small upper bound loop unless user or TTI asked to do so.
- if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
+ if (MaxTripCount && !UP.Force &&
+ MaxTripCount < std::max(16U, UnrollMaxUpperBound.getValue())) {
UP.Count = 0;
return false;
}
--
2.43.0

View File

@ -0,0 +1,49 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2023 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
/*========================== begin_copyright_notice ============================
Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
See https://llvm.org/LICENSE.txt for license information.
SPDX-License-Identifier: Apache-2.0 with LLVM-exception
============================= end_copyright_notice ===========================*/
# TODO: Once upstreamed, update with LLORG revision & adjust per community review
From 492a1c879f338c3f12ef4d2f619ca2c8f2467da8 Mon Sep 17 00:00:00 2001
From: Artem Gindinson <artem.gindinson@intel.com>
Date: Wed, 23 Aug 2023 15:41:51 +0200
Subject: [PATCH] [InstCombine] Check for NaN before folding `select` for FP
operators
---
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index e7d8208f9..341d8fc49 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -484,8 +484,12 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
// instructions have different flags and add tests to ensure the
// behaviour is correct.
FastMathFlags FMF;
- if (isa<FPMathOperator>(&SI))
+ if (isa<FPMathOperator>(&SI)) {
FMF = SI.getFastMathFlags();
+ // Avoid folding on NaN inputs
+ if (!FMF.noNaNs())
+ return nullptr;
+ }
Constant *C = ConstantExpr::getBinOpIdentity(
TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros());
Value *OOp = TVI->getOperand(2 - OpToFold);
--
2.43.0

View File

@ -0,0 +1,29 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2025 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
# Description : Fix ambiguous evaluation order
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -683,7 +683,11 @@ private:
return getReplacementMDNode(N);
};
- Replacements[N] = doRemap(N);
+ // Seperate recursive doRemap and operator [] into 2 lines to avoid
+ // out-of-order evaluations since both of them can access the same memory
+ // location in map Replacements.
+ auto Value = doRemap(N);
+ Replacements[N] = Value;
}
/// Do the remapping traversal.
--
2.43.0

View File

@ -0,0 +1,367 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2025 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
# Description : Refactor getPreviousDefRecursive to getPreviousDefIterative
diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -251,10 +251,7 @@ private:
MemoryAccess *getPreviousDef(MemoryAccess *);
MemoryAccess *getPreviousDefInBlock(MemoryAccess *);
MemoryAccess *
- getPreviousDefFromEnd(BasicBlock *,
- DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
- MemoryAccess *
- getPreviousDefRecursive(BasicBlock *,
+ getPreviousDefIterative(BasicBlock *,
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
MemoryAccess *recursePhi(MemoryAccess *Phi);
MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi);
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -20,6 +20,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/Support/Debug.h"
#include <algorithm>
+#include <stack>
#define DEBUG_TYPE "memoryssa"
using namespace llvm;
@@ -33,66 +34,42 @@ using namespace llvm;
// that there are two or more definitions needing to be merged.
// This still will leave non-minimal form in the case of irreducible control
// flow, where phi nodes may be in cycles with themselves, but unnecessary.
-MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
- BasicBlock *BB,
+MemoryAccess *MemorySSAUpdater::getPreviousDefIterative(
+ BasicBlock *BBB,
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
- // First, do a cache lookup. Without this cache, certain CFG structures
- // (like a series of if statements) take exponential time to visit.
- auto Cached = CachedPreviousDef.find(BB);
- if (Cached != CachedPreviousDef.end())
- return Cached->second;
-
- // If this method is called from an unreachable block, return LoE.
- if (!MSSA->DT->isReachableFromEntry(BB))
- return MSSA->getLiveOnEntryDef();
- if (BasicBlock *Pred = BB->getUniquePredecessor()) {
- VisitedBlocks.insert(BB);
- // Single predecessor case, just recurse, we can only have one definition.
- MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef);
- CachedPreviousDef.insert({BB, Result});
- return Result;
- }
+ // There're 5 cases, case 3 (easy) and case 5 (hard) has recursives.
+ // We need special states to handle their recursive returns
+ enum State {COMMON, CASE3, CASE5};
- if (VisitedBlocks.count(BB)) {
- // We hit our node again, meaning we had a cycle, we must insert a phi
- // node to break it so we have an operand. The only case this will
- // insert useless phis is if we have irreducible control flow.
- MemoryAccess *Result = MSSA->createMemoryPhi(BB);
- CachedPreviousDef.insert({BB, Result});
- return Result;
- }
+ // This is the common frame required for everything
+ struct Frame {
+ BasicBlock *bb;
+ MemoryAccess *rtn;
+ State st;
+ };
- if (VisitedBlocks.insert(BB).second) {
- // Mark us visited so we can detect a cycle
+ // This is the additional info only required by Case 5
+ struct FrameCase5 {
SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps;
+ bool UniqueIncomingAccess;
+ MemoryAccess *SingleAccess;
+ pred_iterator PredIt;
+ };
- // Recurse to get the values in our predecessors for placement of a
- // potential phi node. This will insert phi nodes if we cycle in order to
- // break the cycle and have an operand.
- bool UniqueIncomingAccess = true;
- MemoryAccess *SingleAccess = nullptr;
- for (auto *Pred : predecessors(BB)) {
- if (MSSA->DT->isReachableFromEntry(Pred)) {
- auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef);
- if (!SingleAccess)
- SingleAccess = IncomingAccess;
- else if (IncomingAccess != SingleAccess)
- UniqueIncomingAccess = false;
- PhiOps.push_back(IncomingAccess);
- } else
- PhiOps.push_back(MSSA->getLiveOnEntryDef());
- }
-
+ auto Case5AfterLoop = [&](SmallVector<TrackingVH<MemoryAccess>, 8> & PhiOps,
+ bool & UniqueIncomingAccess, MemoryAccess *& SingleAccess,
+ BasicBlock * BB) -> MemoryAccess * {
// Now try to simplify the ops to avoid placing a phi.
// This may return null if we never created a phi yet, that's okay
MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB));
// See if we can avoid the phi by simplifying it.
- auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
+ MemoryAccess *Result = tryRemoveTrivialPhi(Phi, PhiOps);
// If we couldn't simplify, we may have to create a phi
if (Result == Phi && UniqueIncomingAccess && SingleAccess) {
- // A concrete Phi only exists if we created an empty one to break a cycle.
+ // A concrete Phi only exists if we created an empty one to break a
+ // cycle.
if (Phi) {
assert(Phi->operands().empty() && "Expected empty Phi");
Phi->replaceAllUsesWith(SingleAccess);
@@ -104,12 +81,13 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
Phi = MSSA->createMemoryPhi(BB);
// See if the existing phi operands match what we need.
- // Unlike normal SSA, we only allow one phi node per block, so we can't just
- // create a new one.
+ // Unlike normal SSA, we only allow one phi node per block, so we
+ // can't just create a new one.
if (Phi->getNumOperands() != 0) {
// FIXME: Figure out whether this is dead code and if so remove it.
if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
- // These will have been filled in by the recursive read we did above.
+ // These will have been filled in by the recursive read we did
+ // above.
llvm::copy(PhiOps, Phi->op_begin());
std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
}
@@ -126,8 +104,170 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
VisitedBlocks.erase(BB);
CachedPreviousDef.insert({BB, Result});
return Result;
+ };
+
+ // We may want to switch to vector to boot performance
+ std::stack<Frame> SF;
+ std::stack<FrameCase5> SF5;
+ // The return frame
+ SF.push({nullptr, nullptr, COMMON});
+ // The entry frame
+ SF.push({BBB, nullptr, COMMON});
+
+ while (SF.size() > 1) {
+
+ if (COMMON == SF.top().st) {
+ auto BB = SF.top().bb;
+ auto Cached = CachedPreviousDef.find(BB);
+ if (Cached != CachedPreviousDef.end()) {
+ SF.pop();
+ SF.top().rtn = Cached->second;
+ continue;
+ } else if (!MSSA->DT->isReachableFromEntry(BB)) {
+ SF.pop();
+ SF.top().rtn = MSSA->getLiveOnEntryDef();
+ continue;
+ } else if (BasicBlock *Pred = BB->getUniquePredecessor()) {
+ VisitedBlocks.insert(BB);
+ // Single predecessor case, just recurse, we can only have one
+ // definition.
+ MemoryAccess *prevDefFromEnd = nullptr;
+ auto *Defs = MSSA->getWritableBlockDefs(Pred);
+ if (Defs) {
+ CachedPreviousDef.insert({Pred, &*Defs->rbegin()});
+ prevDefFromEnd = &*Defs->rbegin();
+ } else {
+ SF.top().st = CASE3;
+ SF.push({Pred, nullptr, COMMON});
+ continue;
+ }
+ MemoryAccess *Result = prevDefFromEnd;
+ CachedPreviousDef.insert({BB, Result});
+ SF.pop();
+ SF.top().rtn = Result;
+ continue;
+ } else if (VisitedBlocks.count(BB)) {
+ // We hit our node again, meaning we had a cycle, we must insert a phi
+ // node to break it so we have an operand. The only case this will
+ // insert useless phis is if we have irreducible control flow.
+ MemoryAccess *Result = MSSA->createMemoryPhi(BB);
+ CachedPreviousDef.insert({BB, Result});
+ SF.pop();
+ SF.top().rtn = Result;
+ continue;
+ } else if (VisitedBlocks.insert(BB).second) {
+ // Mark us visited so we can detect a cycle
+ SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps;
+
+ // Recurse to get the values in our predecessors for placement of a
+ // potential phi node. This will insert phi nodes if we cycle in order
+ // to break the cycle and have an operand.
+ bool UniqueIncomingAccess = true;
+ MemoryAccess *SingleAccess = nullptr;
+ bool halt = false;
+ for (auto PredIt = predecessors(BB).begin();
+ PredIt != predecessors(BB).end(); PredIt++) {
+ auto Pred = *PredIt;
+ if (MSSA->DT->isReachableFromEntry(Pred)) {
+ MemoryAccess *prevDefFromEnd = nullptr;
+ auto *Defs = MSSA->getWritableBlockDefs(Pred);
+ if (Defs) {
+ CachedPreviousDef.insert({Pred, &*Defs->rbegin()});
+ prevDefFromEnd = &*Defs->rbegin();
+ } else {
+ SF.top().st = CASE5;
+ SF.push({Pred, nullptr, COMMON});
+ SF5.push({
+ std::move(PhiOps), UniqueIncomingAccess, SingleAccess,
+ std::move(PredIt)
+ });
+ halt = true;
+ break;
+ }
+ auto *IncomingAccess = prevDefFromEnd;
+ if (!SingleAccess)
+ SingleAccess = IncomingAccess;
+ else if (IncomingAccess != SingleAccess)
+ UniqueIncomingAccess = false;
+ PhiOps.push_back(IncomingAccess);
+ } else
+ PhiOps.push_back(MSSA->getLiveOnEntryDef());
+ }
+ if (halt)
+ continue;
+
+ auto Result =
+ Case5AfterLoop(PhiOps, UniqueIncomingAccess, SingleAccess, BB);
+
+ // Set ourselves up for the next variable by resetting visited state.
+ VisitedBlocks.erase(BB);
+ CachedPreviousDef.insert({BB, Result});
+ SF.pop();
+ SF.top().rtn = Result;
+ continue;
+ }
+ llvm_unreachable("Should have hit one of the five cases above");
+ } else if (CASE3 == SF.top().st) {
+ auto Result = SF.top().rtn;
+ CachedPreviousDef.insert({SF.top().bb, Result});
+ SF.pop();
+ SF.top().rtn = Result;
+ continue;
+ } else { // CASE5
+ // recover header
+ auto &PhiOps = SF5.top().PhiOps;
+ auto &UniqueIncomingAccess = SF5.top().UniqueIncomingAccess;
+ auto &SingleAccess = SF5.top().SingleAccess;
+ auto &PredIt = SF5.top().PredIt;
+ auto IncomingAccess = SF.top().rtn;
+ auto BB = SF.top().bb;
+
+ // in-loop remaining code
+ if (!SingleAccess)
+ SingleAccess = IncomingAccess;
+ else if (IncomingAccess != SingleAccess)
+ UniqueIncomingAccess = false;
+ PhiOps.push_back(IncomingAccess);
+
+ // remaining loop
+ bool halt = false;
+ for (PredIt++; PredIt != predecessors(BB).end(); PredIt++) {
+ auto Pred = *PredIt;
+ if (MSSA->DT->isReachableFromEntry(Pred)) {
+ MemoryAccess *prevDefFromEnd = nullptr;
+ auto *Defs = MSSA->getWritableBlockDefs(Pred);
+ if (Defs) {
+ CachedPreviousDef.insert({Pred, &*Defs->rbegin()});
+ prevDefFromEnd = &*Defs->rbegin();
+ } else {
+ SF.push({Pred, nullptr, COMMON});
+ halt = true;
+ break;
+ }
+ auto *IncomingAccess = prevDefFromEnd;
+ if (!SingleAccess)
+ SingleAccess = IncomingAccess;
+ else if (IncomingAccess != SingleAccess)
+ UniqueIncomingAccess = false;
+ PhiOps.push_back(IncomingAccess);
+ } else
+ PhiOps.push_back(MSSA->getLiveOnEntryDef());
+ }
+ if (halt)
+ continue;
+ // after loop
+ auto Result =
+ Case5AfterLoop(PhiOps, UniqueIncomingAccess, SingleAccess, BB);
+ SF.pop();
+ SF.top().rtn = Result;
+ SF5.pop();
+ continue;
+ }
+
+ llvm_unreachable("Should have hit one of the three cases above");
}
- llvm_unreachable("Should have hit one of the three cases above");
+ assert(0 == SF5.size());
+ return SF.top().rtn;
}
// This starts at the memory access, and goes backwards in the block to find the
@@ -138,7 +278,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) {
if (auto *LocalResult = getPreviousDefInBlock(MA))
return LocalResult;
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
- return getPreviousDefRecursive(MA->getBlock(), CachedPreviousDef);
+ return getPreviousDefIterative(MA->getBlock(), CachedPreviousDef);
}
// This starts at the memory access, and goes backwards in the block to the find
@@ -168,19 +308,6 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) {
return nullptr;
}
-// This starts at the end of block
-MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(
- BasicBlock *BB,
- DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
- auto *Defs = MSSA->getWritableBlockDefs(BB);
-
- if (Defs) {
- CachedPreviousDef.insert({BB, &*Defs->rbegin()});
- return &*Defs->rbegin();
- }
-
- return getPreviousDefRecursive(BB, CachedPreviousDef);
-}
// Recurse over a set of phi uses to eliminate the trivial ones
MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
if (!Phi)
@@ -396,7 +523,17 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
auto *BBIDF = MPhi->getBlock();
for (auto *Pred : predecessors(BBIDF)) {
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
- MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred);
+ // inline getPreviousDefFromEnd start
+ MemoryAccess *prevDefFromEnd = nullptr;
+ auto *Defs = MSSA->getWritableBlockDefs(Pred);
+ if (Defs) {
+ CachedPreviousDef.insert({Pred, &*Defs->rbegin()});
+ prevDefFromEnd = & * Defs->rbegin();
+ } else {
+ prevDefFromEnd = getPreviousDefIterative(Pred, CachedPreviousDef);
+ }
+ // inline getPreviousDefFromEnd end
+ MPhi->addIncoming(prevDefFromEnd, Pred);
}
}
--
2.43.0

View File

@ -0,0 +1,39 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2025 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
/*========================== begin_copyright_notice ============================
Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
See https://llvm.org/LICENSE.txt for license information.
SPDX-License-Identifier: Apache-2.0 with LLVM-exception
============================= end_copyright_notice ===========================*/
This is a cherry-pick of commit https://github.com/llvm/llvm-project/commit/a87738f86b17f4a8dcde538c60826506e2a27ed1:
"[AutoUpgrade] Don't upgrade intrinsics returning overloaded struct type"
---
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 7b9c55ff3..d0d5c9f4e 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -1131,7 +1131,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
}
auto *ST = dyn_cast<StructType>(F->getReturnType());
- if (ST && (!ST->isLiteral() || ST->isPacked())) {
+ if (ST && (!ST->isLiteral() || ST->isPacked()) &&
+ F->getIntrinsicID() != Intrinsic::not_intrinsic) {
// Replace return type with literal non-packed struct. Only do this for
// intrinsics declared to return a struct, not for intrinsics with
// overloaded return type, in which case the exact struct type will be
--
2.43.0

View File

@ -0,0 +1,143 @@
/*========================== begin_copyright_notice ============================
Copyright (C) 2025 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
/*========================== begin_copyright_notice ============================
Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
See https://llvm.org/LICENSE.txt for license information.
SPDX-License-Identifier: Apache-2.0 with LLVM-exception
============================= end_copyright_notice ===========================*/
This is backport of this fix: https://github.com/llvm/llvm-project/pull/99257
"[IR] Unify max alignment for arguments with generic max align."
From 47f034550a5fb9ef6adee6347cd3c00e70ca663d Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Tue, 16 Jul 2024 16:03:38 -0700
Subject: [PATCH 1/3] Unify max alignment for arguments with generic max align.
The 2^14 limit was completely arbitrary; the generic limit is still
arbitrary, but at least it's the same arbitrary limit as everything
else.
While I'm here, also add a verifier check for the ByValOrByRefSize.
---
llvm/include/llvm/CodeGen/TargetCallingConv.h | 8 ++---
llvm/lib/IR/Verifier.cpp | 34 +++++++++++--------
2 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 89ea9bcb2a40..70a2d8e5faaf 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -45,9 +45,9 @@ namespace ISD {
unsigned IsHva : 1; ///< HVA field for
unsigned IsHvaStart : 1; ///< HVA structure start
unsigned IsSecArgPass : 1; ///< Second argument
- unsigned MemAlign : 4; ///< Log 2 of alignment when arg is passed in memory
- ///< (including byval/byref). The max alignment is
- ///< verified in IR verification.
+ unsigned MemAlign : 6; ///< Log 2 of alignment when arg is passed in memory
+ ///< (including byval/byref). The max alignment is
+ ///< verified in IR verification.
unsigned OrigAlign : 5; ///< Log 2 of original alignment
unsigned IsInConsecutiveRegsLast : 1;
unsigned IsInConsecutiveRegs : 1;
@@ -67,7 +67,7 @@ namespace ISD {
IsSecArgPass(0), MemAlign(0), OrigAlign(0),
IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
IsCopyElisionCandidate(0), IsPointer(0) {
- static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big");
+ static_assert(sizeof(*this) == 4 * sizeof(unsigned), "flags are too big");
}
bool isZExt() const { return IsZExt; }
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 1408ce293ca6..2370bfd0a2be 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -291,13 +291,6 @@ namespace {
class Verifier : public InstVisitor<Verifier>, VerifierSupport {
friend class InstVisitor<Verifier>;
-
- // ISD::ArgFlagsTy::MemAlign only have 4 bits for alignment, so
- // the alignment size should not exceed 2^15. Since encode(Align)
- // would plus the shift value by 1, the alignment size should
- // not exceed 2^14, otherwise it can NOT be properly lowered
- // in backend.
- static constexpr unsigned ParamMaxAlignment = 1 << 14;
DominatorTree DT;
/// When verifying a basic block, keep track of all of the
@@ -1939,31 +1932,43 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
}
if (isa<PointerType>(Ty)) {
+ if (Attrs.hasAttribute(Attribute::Alignment)) {
+ Align AttrAlign = Attrs.getAlignment().valueOrOne();
+ Check(AttrAlign.value() <= Value::MaximumAlignment,
+ "huge alignment values are unsupported", V);
+ }
if (Attrs.hasAttribute(Attribute::ByVal)) {
- if (Attrs.hasAttribute(Attribute::Alignment)) {
- Align AttrAlign = Attrs.getAlignment().valueOrOne();
- Align MaxAlign(ParamMaxAlignment);
- Check(AttrAlign <= MaxAlign,
- "Attribute 'align' exceed the max size 2^14", V);
- }
SmallPtrSet<Type *, 4> Visited;
Check(Attrs.getByValType()->isSized(&Visited),
"Attribute 'byval' does not support unsized types!", V);
+ Check(DL.getTypeAllocSize(Attrs.getByValType()).getKnownMinValue() <
+ (1ULL << 32),
+ "huge 'byval' arguments are unsupported", V);
}
if (Attrs.hasAttribute(Attribute::ByRef)) {
SmallPtrSet<Type *, 4> Visited;
Check(Attrs.getByRefType()->isSized(&Visited),
"Attribute 'byref' does not support unsized types!", V);
+ Check(DL.getTypeAllocSize(Attrs.getByRefType()).getKnownMinValue() <
+ (1ULL << 32),
+ "huge 'byref' arguments are unsupported", V);
}
if (Attrs.hasAttribute(Attribute::InAlloca)) {
SmallPtrSet<Type *, 4> Visited;
Check(Attrs.getInAllocaType()->isSized(&Visited),
"Attribute 'inalloca' does not support unsized types!", V);
+ Check(DL.getTypeAllocSize(Attrs.getInAllocaType()).getKnownMinValue() <
+ (1ULL << 32),
+ "huge 'inalloca' arguments are unsupported", V);
}
if (Attrs.hasAttribute(Attribute::Preallocated)) {
SmallPtrSet<Type *, 4> Visited;
Check(Attrs.getPreallocatedType()->isSized(&Visited),
"Attribute 'preallocated' does not support unsized types!", V);
+ Check(
+ DL.getTypeAllocSize(Attrs.getPreallocatedType()).getKnownMinValue() <
+ (1ULL << 32),
+ "huge 'preallocated' arguments are unsupported", V);
}
}
@@ -3424,8 +3429,7 @@ void Verifier::visitCallBase(CallBase &Call) {
if (!Ty->isSized())
return;
Align ABIAlign = DL.getABITypeAlign(Ty);
- Align MaxAlign(ParamMaxAlignment);
- Check(ABIAlign <= MaxAlign,
+ Check(ABIAlign.value() <= Value::MaximumAlignment,
"Incorrect alignment of " + Message + " to called function!", Call);
};
--
2.43.0