From 9d5bec323744edd0327911b0f693d9613009880a Mon Sep 17 00:00:00 2001
From: "Mielczarek, Aleksander" <aleksander.mielczarek@intel.com>
Date: Mon, 13 Oct 2025 06:46:54 +0000
Subject: [PATCH]  Additional LLVM patching

Add necessary LLVM patches.
---
 ...eating-a-stack-space-for-inlined-byv.patch |  32 ++
 ...verse-or-bswap-intrinsics-of-illegal.patch |  28 ++
 ...-fold-bitcast-fptrunc-if-destination.patch |  71 ++++
 ...-fold-extract-element-to-trunc-if-ve.patch |  99 +++++
 .../LowerSwitch-RemoveUnreachableBBs.patch    |  46 +++
 ...too-strict-restrictions-in-LICM-pass.patch |  46 +++
 ...upperbound-command-line-option-value.patch |  44 +++
 ...for-NaN-before-folding-select-for-FP.patch |  49 +++
 ...TypeInfoRemoval-remap_eval-ambiguity.patch |  29 ++
 ...ke-getPreviousDefRecursive-iterative.patch | 367 ++++++++++++++++++
 ...oupgrader-igc-struct-typed-intrinsic.patch |  39 ++
 ...unify-max-alignment-with-generic-max.patch | 143 +++++++
 12 files changed, 993 insertions(+)
 create mode 100644 external/llvm/releases/17.0.0/patches_external/Backport-When-creating-a-stack-space-for-inlined-byv.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/Don-t-emit-bitreverse-or-bswap-intrinsics-of-illegal.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/InstCombine-Only-fold-bitcast-fptrunc-if-destination.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/InstCombine-Only-fold-extract-element-to-trunc-if-ve.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/LowerSwitch-RemoveUnreachableBBs.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/Remove-too-strict-restrictions-in-LICM-pass.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/alter-unroll-max-upperbound-command-line-option-value.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/check-for-NaN-before-folding-select-for-FP.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/fix_DebugTypeInfoRemoval-remap_eval-ambiguity.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/make-getPreviousDefRecursive-iterative.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/no-autoupgrader-igc-struct-typed-intrinsic.patch
 create mode 100644 external/llvm/releases/17.0.0/patches_external/unify-max-alignment-with-generic-max.patch
diff --git a/external/llvm/releases/17.0.0/patches_external/Backport-When-creating-a-stack-space-for-inlined-byv.patch b/external/llvm/releases/17.0.0/patches_external/Backport-When-creating-a-stack-space-for-inlined-byv.patch
new file mode 100644
index 000000000..9dcc9d1e6
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/Backport-When-creating-a-stack-space-for-inlined-byv.patch
@@ -0,0 +1,32 @@
+From 84d340ed615c3601a2f46178acce2040d9d114f9 Mon Sep 17 00:00:00 2001
+From: Victor Mustya <victor.mustya@intel.com>
+Date: Mon, 31 Oct 2022 13:27:02 -0700
+Subject: =?UTF-8?q?[Backport]=20When=20creating=20a=20stack=20space=20for?=
+ =?UTF-8?q?=20inlined=20byval=20args,=0A=20use=20the=20same=20addrspace=20?=
+ =?UTF-8?q?as=20the=20original=20argument.?=
+
+From: Chang-Sun Lin Jr <chang-sun.lin.jr@intel.com>
+---
+ llvm/lib/Transforms/Utils/InlineFunction.cpp | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
+index 399c9a43793f..bfb027568227 100644
+--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
++++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
+@@ -1599,6 +1599,12 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg,
+                      Arg->getName(), &*Caller->begin()->begin());
+   IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
+ 
++  // If the byval was in a different address space, add a cast.
++  if (DL.getAllocaAddrSpace() != Arg->getType()->getPointerAddressSpace()) {
++    NewAlloca = new AddrSpaceCastInst(
++        NewAlloca, Arg->getType(), "",
++        cast<Instruction>(NewAlloca)->getNextNonDebugInstruction());
++  }
+   // Uses of the argument in the function should use our new alloca
+   // instead.
+   return NewAlloca;
+--
+2.43.0
+
diff --git a/external/llvm/releases/17.0.0/patches_external/Don-t-emit-bitreverse-or-bswap-intrinsics-of-illegal.patch b/external/llvm/releases/17.0.0/patches_external/Don-t-emit-bitreverse-or-bswap-intrinsics-of-illegal.patch
new file mode 100644
index 000000000..a3b865ca3
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/Don-t-emit-bitreverse-or-bswap-intrinsics-of-illegal.patch
@@ -0,0 +1,28 @@
+From 881bf715f06201a57a4f1a60155b556fedd556db Mon Sep 17 00:00:00 2001
+From: Victor Mustya <victor.mustya@intel.com>
+Date: Tue, 22 Aug 2023 11:10:30 -0700
+Subject: [PATCH] Don't emit bitreverse or bswap intrinsics of illegal bit
+ width during instcombine
+
+---
+ llvm/lib/Transforms/Utils/Local.cpp | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
+index b2ed95b05..476a5c4c1 100644
+--- a/llvm/lib/Transforms/Utils/Local.cpp
++++ b/llvm/lib/Transforms/Utils/Local.cpp
+@@ -3312,6 +3312,10 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
+   if (DemandedBW > ITy->getScalarSizeInBits())
+     return false;
+ 
++  auto &DL = I->getModule()->getDataLayout();
++  if (DL.isIllegalInteger(DemandedBW))
++    return false;
++
+   // Now, is the bit permutation correct for a bswap or a bitreverse? We can
+   // only byteswap values with an even number of bytes.
+   APInt DemandedMask = APInt::getAllOnes(DemandedBW);
+--
+2.43.0
+
diff --git a/external/llvm/releases/17.0.0/patches_external/InstCombine-Only-fold-bitcast-fptrunc-if-destination.patch b/external/llvm/releases/17.0.0/patches_external/InstCombine-Only-fold-bitcast-fptrunc-if-destination.patch
new file mode 100644
index 000000000..8959f1a1e
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/InstCombine-Only-fold-bitcast-fptrunc-if-destination.patch
@@ -0,0 +1,71 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2024 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+
+From 58b5b7d4ed6204f61feeda68c7c1abe24bc143b1 Mon Sep 17 00:00:00 2001
+From: Victor Mustya <victor.mustya@intel.com>
+Date: Tue, 16 Jan 2024 14:13:05 -0800
+Subject: [InstCombine] Only fold bitcast(fptrunc) if destination type matches
+ fptrunc result type. (#77046)
+
+It's not enough to just make sure destination type is floating point,
+because the following chain may be incorrectly optimized:
+```LLVM
+  %trunc = fptrunc float %src to bfloat
+  %cast = bitcast bfloat %trunc to half
+```
+Before the fix, the instruction sequence mentioned above used to be
+translated into single fptrunc instruction as follows:
+```LLVM
+  %trunc = fptrunc float %src to half
+```
+
+Such transformation was semantically incorrect.
+---
+ llvm/lib/IR/Instructions.cpp                |  4 ++--
+ llvm/test/Transforms/InstCombine/fptrunc.ll | 13 +++++++++++++
+ 2 files changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
+index 7c343a0ff..932fc66a8 100644
+--- a/llvm/lib/IR/Instructions.cpp
++++ b/llvm/lib/IR/Instructions.cpp
+@@ -3218,8 +3218,8 @@ unsigned CastInst::isEliminableCastPair(
+       return 0;
+     case 4:
+       // No-op cast in second op implies firstOp as long as the DestTy
+-      // is floating point.
+-      if (DstTy->isFloatingPointTy())
++      // matches MidTy.
++      if (DstTy == MidTy)
+         return firstOp;
+       return 0;
+     case 5:
+diff --git a/llvm/test/Transforms/InstCombine/fptrunc.ll b/llvm/test/Transforms/InstCombine/fptrunc.ll
+index d3e153f12..c78df0b83 100644
+--- a/llvm/test/Transforms/InstCombine/fptrunc.ll
++++ b/llvm/test/Transforms/InstCombine/fptrunc.ll
+@@ -190,3 +190,16 @@ define half @ItoFtoF_u25_f32_f16(i25 %i) {
+   %r = fptrunc float %x to half
+   ret half %r
+ }
++
++; Negative test - bitcast bfloat to half is not optimized
++
++define half @fptrunc_to_bfloat_bitcast_to_half(float %src) {
++; CHECK-LABEL: @fptrunc_to_bfloat_bitcast_to_half(
++; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc float [[SRC:%.*]] to bfloat
++; CHECK-NEXT:    [[CAST:%.*]] = bitcast bfloat [[TRUNC]] to half
++; CHECK-NEXT:    ret half [[CAST]]
++;
++  %trunc = fptrunc float %src to bfloat
++  %cast = bitcast bfloat %trunc to half
++  ret half %cast
++}
+--
+2.34.1
+
diff --git a/external/llvm/releases/17.0.0/patches_external/InstCombine-Only-fold-extract-element-to-trunc-if-ve.patch b/external/llvm/releases/17.0.0/patches_external/InstCombine-Only-fold-extract-element-to-trunc-if-ve.patch
new file mode 100644
index 000000000..ff3ee90bc
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/InstCombine-Only-fold-extract-element-to-trunc-if-ve.patch
@@ -0,0 +1,99 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2025 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+
+/*========================== begin_copyright_notice ============================
+
+Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+See https://llvm.org/LICENSE.txt for license information.
+SPDX-License-Identifier: Apache-2.0 with LLVM-exception
+
+============================= end_copyright_notice ===========================*/
+
+From c5c679933c462f28fac7358841a23ee32c292a47 Mon Sep 17 00:00:00 2001
+From: peterbell10 <peterbell10@openai.com>
+Date: Wed, 20 Nov 2024 21:06:57 +0000
+Subject: [PATCH] [InstCombine] Only fold extract element to trunc if vector
+ `hasOneUse` (#115627)
+
+This fixes a missed optimization caused by the `foldBitcastExtElt`
+pattern interfering with other combine patterns. In the case I was
+hitting, we have IR that combines two vectors into a new larger vector
+by extracting elements and inserting them into the new vector.
+
+```llvm
+define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
+  %avec = bitcast i32 %a to <2 x half>
+  %a0 = extractelement <2 x half> %avec, i32 0
+  %a1 = extractelement <2 x half> %avec, i32 1
+  %bvec = bitcast i32 %b to <2 x half>
+  %b0 = extractelement <2 x half> %bvec, i32 0
+  %b1 = extractelement <2 x half> %bvec, i32 1
+  %ins0 = insertelement <4 x half> undef, half %a0, i32 0
+  %ins1 = insertelement <4 x half> %ins0, half %a1, i32 1
+  %ins2 = insertelement <4 x half> %ins1, half %b0, i32 2
+  %ins3 = insertelement <4 x half> %ins2, half %b1, i32 3
+  ret <4 x half> %ins3
+}
+```
+
+With the current behavior, `InstCombine` converts each vector extract
+sequence to
+
+```llvm
+  %tmp = trunc i32 %a to i16
+  %a0 = bitcast i16 %tmp to half
+  %a1 = extractelement <2 x half> %avec, i32 1
+```
+
+where the extraction of `%a0` is now done by truncating the original
+integer. While on it's own this is fairly reasonable, in this case it
+also blocks the pattern which converts `extractelement` -
+`insertelement` into shuffles which gives the overall simpler result:
+
+```llvm
+define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) {
+  %avec = bitcast i32 %a to <2 x half>
+  %bvec = bitcast i32 %b to <2 x half>
+  %ins3 = shufflevector <2 x half> %avec, <2 x half> %bvec, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x half> %ins3
+}
+```
+
+In this PR I fix the conflict by obeying the `hasOneUse` check even if
+there is no shift instruction required. In these cases we can't remove
+the vector completely, so the pattern has less benefit anyway.
+
+Also fwiw, I think dropping the `hasOneUse` check for the 0th element
+might have been a mistake in the first place. Looking at
+https://github.com/llvm/llvm-project/commit/535c5d56a7bc9966036a11362d8984983a4bf090
+the commit message only mentions loosening the `isDesirableIntType`
+requirement and doesn't mention changing the `hasOneUse` check at all.
+---
+ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+index 61e62adbe327..d3b30848ab8b 100644
+--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
++++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+@@ -202,9 +202,9 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) {
+     if (IsBigEndian)
+       ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC;
+     unsigned ShiftAmountC = ExtIndexC * DestWidth;
+-    if (!ShiftAmountC ||
+-        (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) &&
+-        Ext.getVectorOperand()->hasOneUse())) {
++    if ((!ShiftAmountC ||
++         isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) &&
++        Ext.getVectorOperand()->hasOneUse()) {
+       if (ShiftAmountC)
+         X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset");
+       if (DestTy->isFloatingPointTy()) {
+-- 
+2.43.0
+
diff --git a/external/llvm/releases/17.0.0/patches_external/LowerSwitch-RemoveUnreachableBBs.patch b/external/llvm/releases/17.0.0/patches_external/LowerSwitch-RemoveUnreachableBBs.patch
new file mode 100644
index 000000000..030123857
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/LowerSwitch-RemoveUnreachableBBs.patch
@@ -0,0 +1,46 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2024 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+/*========================== begin_copyright_notice ============================
+
+Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+See https://llvm.org/LICENSE.txt for license information.
+SPDX-License-Identifier: Apache-2.0 with LLVM-exception
+
+============================= end_copyright_notice ===========================*/
+The reason for removing unreachable blocks is this change in the LLVM repo:
+https://github.com/llvm/llvm-project/commit/1065f3439bad59323f16e7c8ee568c7d94dcd952
+LowerSwitchPass can leave phi instructions with nodes from unreachable basic blocks
+which is a disallowed state for DomTree.
+
+diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+index 227de425ff85..8d089c2a754c 100644
+--- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp
++++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+@@ -38,6 +38,7 @@
+ #include "llvm/Support/raw_ostream.h"
+ #include "llvm/Transforms/Utils.h"
+ #include "llvm/Transforms/Utils/BasicBlockUtils.h"
++#include "llvm/Transforms/Utils/Local.h"
+ #include <algorithm>
+ #include <cassert>
+ #include <cstdint>
+@@ -556,6 +557,10 @@ bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) {
+     DeleteDeadBlock(BB);
+   }
+ 
++  if (!DeleteList.empty()) {
++    removeUnreachableBlocks(F);
++  }
++
+   return Changed;
+ }
+ 
+
+--
+2.34.1
+
diff --git a/external/llvm/releases/17.0.0/patches_external/Remove-too-strict-restrictions-in-LICM-pass.patch b/external/llvm/releases/17.0.0/patches_external/Remove-too-strict-restrictions-in-LICM-pass.patch
new file mode 100644
index 000000000..2c60c1fd1
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/Remove-too-strict-restrictions-in-LICM-pass.patch
@@ -0,0 +1,46 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2025 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+
+The reason for removal of below condition was that it took a very strict
+approach to the Convergent attribute, which caused missed optimization
+opportunities in cases where it was safe to do so.
+The decision is based on the discussion in LLVM RFC
+https://reviews.llvm.org/D90361?id=303195
+This patch should be considered obsolete if LICM introduces a more
+advanced approach to the Convergent attribute in the future version of
+LLVM.
+---
+diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
+index 2865dece8..f879176b3 100644
+--- a/llvm/lib/Transforms/Scalar/LICM.cpp
++++ b/llvm/lib/Transforms/Scalar/LICM.cpp
+@@ -1202,8 +1202,18 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
+     // inter-thread communication which results are implicitly affected by the
+     // enclosing control flows. It is not safe to hoist or sink such operations
+     // across control flow.
+-    if (CI->isConvergent())
+-      return false;
++
++    // The reason for removal of below condition was that it took a very strict
++    // approach to the Convergent attribute, which caused missed optimization
++    // opportunities in cases where it was safe to do so.
++    // The decision is based on the discussion in LLVM RFC
++    // https://reviews.llvm.org/D90361?id=303195
++    // This patch should be considered obsolete if LICM introduces a more
++    // advanced approach to the Convergent attribute in the future version of
++    // LLVM.
++
++    //if (CI->isConvergent())
++    //  return false;
+ 
+     using namespace PatternMatch;
+     if (match(CI, m_Intrinsic<Intrinsic::assume>()))
+--
+2.43.0
+
+
diff --git a/external/llvm/releases/17.0.0/patches_external/alter-unroll-max-upperbound-command-line-option-value.patch b/external/llvm/releases/17.0.0/patches_external/alter-unroll-max-upperbound-command-line-option-value.patch
new file mode 100644
index 000000000..cf5345099
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/alter-unroll-max-upperbound-command-line-option-value.patch
@@ -0,0 +1,44 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2025 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+
+/*========================== begin_copyright_notice ============================
+
+Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+See https://llvm.org/LICENSE.txt for license information.
+SPDX-License-Identifier: Apache-2.0 with LLVM-exception
+
+============================= end_copyright_notice ===========================*/
+
+This comes from patches of LLVM 14 (copy-pasted from there)
+
+diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+index 9beb2281c..a3cc73ca5 100644
+--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
++++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+@@ -968,7 +968,7 @@ bool llvm::computeUnrollCount(
+   // cost of exact full unrolling.  As such, if we have an exact count and
+   // found it unprofitable, we'll never chose to bounded unroll.
+   if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) &&
+-      MaxTripCount <= UnrollMaxUpperBound) {
++      MaxTripCount < std::max(16U, UnrollMaxUpperBound.getValue())) {
+     UP.Count = MaxTripCount;
+     if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues,
+                                              MaxTripCount, UCE, UP)) {
+@@ -1042,7 +1042,8 @@ bool llvm::computeUnrollCount(
+   }
+
+   // Don't unroll a small upper bound loop unless user or TTI asked to do so.
+-  if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) {
++  if (MaxTripCount && !UP.Force &&
++      MaxTripCount < std::max(16U, UnrollMaxUpperBound.getValue())) {
+     UP.Count = 0;
+     return false;
+   }
+--
+2.43.0
+
diff --git a/external/llvm/releases/17.0.0/patches_external/check-for-NaN-before-folding-select-for-FP.patch b/external/llvm/releases/17.0.0/patches_external/check-for-NaN-before-folding-select-for-FP.patch
new file mode 100644
index 000000000..d7ade5928
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/check-for-NaN-before-folding-select-for-FP.patch
@@ -0,0 +1,49 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2023 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+
+/*========================== begin_copyright_notice ============================
+
+Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+See https://llvm.org/LICENSE.txt for license information.
+SPDX-License-Identifier: Apache-2.0 with LLVM-exception
+
+============================= end_copyright_notice ===========================*/
+
+# TODO: Once upstreamed, update with LLORG revision & adjust per community review
+
+From 492a1c879f338c3f12ef4d2f619ca2c8f2467da8 Mon Sep 17 00:00:00 2001
+From: Artem Gindinson <artem.gindinson@intel.com>
+Date: Wed, 23 Aug 2023 15:41:51 +0200
+Subject: [PATCH] [InstCombine] Check for NaN before folding `select` for FP
+ operators
+
+---
+ llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+index e7d8208f9..341d8fc49 100644
+--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
++++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+@@ -484,8 +484,12 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+     // instructions have different flags and add tests to ensure the
+     // behaviour is correct.
+     FastMathFlags FMF;
+-    if (isa<FPMathOperator>(&SI))
++    if (isa<FPMathOperator>(&SI)) {
+       FMF = SI.getFastMathFlags();
++      // Avoid folding on NaN inputs
++      if (!FMF.noNaNs())
++        return nullptr;
++    }
+     Constant *C = ConstantExpr::getBinOpIdentity(
+         TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros());
+     Value *OOp = TVI->getOperand(2 - OpToFold);
+--
+2.43.0
+
diff --git a/external/llvm/releases/17.0.0/patches_external/fix_DebugTypeInfoRemoval-remap_eval-ambiguity.patch b/external/llvm/releases/17.0.0/patches_external/fix_DebugTypeInfoRemoval-remap_eval-ambiguity.patch
new file mode 100644
index 000000000..352b6c66a
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/fix_DebugTypeInfoRemoval-remap_eval-ambiguity.patch
@@ -0,0 +1,29 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2025 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+
+#    Description                  : Fix ambiguous evaluation order
+
+diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
+--- a/llvm/lib/IR/DebugInfo.cpp
++++ b/llvm/lib/IR/DebugInfo.cpp
+@@ -683,7 +683,11 @@ private:
+
+       return getReplacementMDNode(N);
+     };
+-    Replacements[N] = doRemap(N);
++    // Seperate recursive doRemap and operator [] into 2 lines to avoid
++    // out-of-order evaluations since both of them can access the same memory
++    // location in map Replacements.
++    auto Value = doRemap(N);
++    Replacements[N] = Value;
+   }
+
+   /// Do the remapping traversal.
+--
+2.43.0
+
diff --git a/external/llvm/releases/17.0.0/patches_external/make-getPreviousDefRecursive-iterative.patch b/external/llvm/releases/17.0.0/patches_external/make-getPreviousDefRecursive-iterative.patch
new file mode 100644
index 000000000..504f9ae61
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/make-getPreviousDefRecursive-iterative.patch
@@ -0,0 +1,367 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2025 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+
+#    Description                  : Refactor getPreviousDefRecursive to getPreviousDefIterative
+
+diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
++++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+@@ -251,10 +251,7 @@ private:
+   MemoryAccess *getPreviousDef(MemoryAccess *);
+   MemoryAccess *getPreviousDefInBlock(MemoryAccess *);
+   MemoryAccess *
+-  getPreviousDefFromEnd(BasicBlock *,
+-                        DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
+-  MemoryAccess *
+-  getPreviousDefRecursive(BasicBlock *,
++  getPreviousDefIterative(BasicBlock *,
+                           DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &);
+   MemoryAccess *recursePhi(MemoryAccess *Phi);
+   MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi);
+diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
+--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
++++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
+@@ -20,6 +20,7 @@
+ #include "llvm/IR/Dominators.h"
+ #include "llvm/Support/Debug.h"
+ #include <algorithm>
++#include <stack>
+ 
+ #define DEBUG_TYPE "memoryssa"
+ using namespace llvm;
+@@ -33,66 +34,42 @@ using namespace llvm;
+ // that there are two or more definitions needing to be merged.
+ // This still will leave non-minimal form in the case of irreducible control
+ // flow, where phi nodes may be in cycles with themselves, but unnecessary.
+-MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
+-    BasicBlock *BB,
++MemoryAccess *MemorySSAUpdater::getPreviousDefIterative(
++    BasicBlock *BBB,
+     DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
+-  // First, do a cache lookup. Without this cache, certain CFG structures
+-  // (like a series of if statements) take exponential time to visit.
+-  auto Cached = CachedPreviousDef.find(BB);
+-  if (Cached != CachedPreviousDef.end())
+-    return Cached->second;
+-
+-  // If this method is called from an unreachable block, return LoE.
+-  if (!MSSA->DT->isReachableFromEntry(BB))
+-    return MSSA->getLiveOnEntryDef();
+ 
+-  if (BasicBlock *Pred = BB->getUniquePredecessor()) {
+-    VisitedBlocks.insert(BB);
+-    // Single predecessor case, just recurse, we can only have one definition.
+-    MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef);
+-    CachedPreviousDef.insert({BB, Result});
+-    return Result;
+-  }
++  // There're 5 cases, case 3 (easy) and case 5 (hard) has recursives.
++  // We need special states to handle their recursive returns
++  enum State {COMMON, CASE3, CASE5};
+ 
+-  if (VisitedBlocks.count(BB)) {
+-    // We hit our node again, meaning we had a cycle, we must insert a phi
+-    // node to break it so we have an operand. The only case this will
+-    // insert useless phis is if we have irreducible control flow.
+-    MemoryAccess *Result = MSSA->createMemoryPhi(BB);
+-    CachedPreviousDef.insert({BB, Result});
+-    return Result;
+-  }
++  // This is the common frame required for everything
++  struct Frame {
++    BasicBlock *bb;
++    MemoryAccess *rtn;
++    State st;
++  };
+ 
+-  if (VisitedBlocks.insert(BB).second) {
+-    // Mark us visited so we can detect a cycle
++  // This is the additional info only required by Case 5
++  struct FrameCase5 {
+     SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps;
++    bool UniqueIncomingAccess;
++    MemoryAccess *SingleAccess;
++    pred_iterator PredIt;
++  };
+ 
+-    // Recurse to get the values in our predecessors for placement of a
+-    // potential phi node. This will insert phi nodes if we cycle in order to
+-    // break the cycle and have an operand.
+-    bool UniqueIncomingAccess = true;
+-    MemoryAccess *SingleAccess = nullptr;
+-    for (auto *Pred : predecessors(BB)) {
+-      if (MSSA->DT->isReachableFromEntry(Pred)) {
+-        auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef);
+-        if (!SingleAccess)
+-          SingleAccess = IncomingAccess;
+-        else if (IncomingAccess != SingleAccess)
+-          UniqueIncomingAccess = false;
+-        PhiOps.push_back(IncomingAccess);
+-      } else
+-        PhiOps.push_back(MSSA->getLiveOnEntryDef());
+-    }
+-
++  auto Case5AfterLoop = [&](SmallVector<TrackingVH<MemoryAccess>, 8> & PhiOps,
++      bool & UniqueIncomingAccess, MemoryAccess *& SingleAccess,
++      BasicBlock * BB) -> MemoryAccess * {
+     // Now try to simplify the ops to avoid placing a phi.
+     // This may return null if we never created a phi yet, that's okay
+     MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB));
+ 
+     // See if we can avoid the phi by simplifying it.
+-    auto *Result = tryRemoveTrivialPhi(Phi, PhiOps);
++    MemoryAccess *Result = tryRemoveTrivialPhi(Phi, PhiOps);
+     // If we couldn't simplify, we may have to create a phi
+     if (Result == Phi && UniqueIncomingAccess && SingleAccess) {
+-      // A concrete Phi only exists if we created an empty one to break a cycle.
++      // A concrete Phi only exists if we created an empty one to break a
++      // cycle.
+       if (Phi) {
+         assert(Phi->operands().empty() && "Expected empty Phi");
+         Phi->replaceAllUsesWith(SingleAccess);
+@@ -104,12 +81,13 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
+         Phi = MSSA->createMemoryPhi(BB);
+ 
+       // See if the existing phi operands match what we need.
+-      // Unlike normal SSA, we only allow one phi node per block, so we can't just
+-      // create a new one.
++      // Unlike normal SSA, we only allow one phi node per block, so we
++      // can't just create a new one.
+       if (Phi->getNumOperands() != 0) {
+         // FIXME: Figure out whether this is dead code and if so remove it.
+         if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) {
+-          // These will have been filled in by the recursive read we did above.
++          // These will have been filled in by the recursive read we did
++          // above.
+           llvm::copy(PhiOps, Phi->op_begin());
+           std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin());
+         }
+@@ -126,8 +104,170 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive(
+     VisitedBlocks.erase(BB);
+     CachedPreviousDef.insert({BB, Result});
+     return Result;
++  };
++
++  // We may want to switch to vector to boot performance
++  std::stack<Frame> SF;
++  std::stack<FrameCase5> SF5;
++  // The return frame
++  SF.push({nullptr, nullptr, COMMON});
++  // The entry frame
++  SF.push({BBB, nullptr, COMMON});
++
++  while (SF.size() > 1) {
++
++    if (COMMON == SF.top().st) {
++      auto BB = SF.top().bb;
++      auto Cached = CachedPreviousDef.find(BB);
++      if (Cached != CachedPreviousDef.end()) {
++        SF.pop();
++        SF.top().rtn = Cached->second;
++        continue;
++      } else if (!MSSA->DT->isReachableFromEntry(BB)) {
++        SF.pop();
++        SF.top().rtn = MSSA->getLiveOnEntryDef();
++        continue;
++      } else if (BasicBlock *Pred = BB->getUniquePredecessor()) {
++        VisitedBlocks.insert(BB);
++        // Single predecessor case, just recurse, we can only have one
++        // definition.
++        MemoryAccess *prevDefFromEnd = nullptr;
++        auto *Defs = MSSA->getWritableBlockDefs(Pred);
++        if (Defs) {
++          CachedPreviousDef.insert({Pred, &*Defs->rbegin()});
++          prevDefFromEnd = &*Defs->rbegin();
++        } else {
++          SF.top().st = CASE3;
++          SF.push({Pred, nullptr, COMMON});
++          continue;
++        }
++        MemoryAccess *Result = prevDefFromEnd;
++        CachedPreviousDef.insert({BB, Result});
++        SF.pop();
++        SF.top().rtn = Result;
++        continue;
++      } else if (VisitedBlocks.count(BB)) {
++        // We hit our node again, meaning we had a cycle, we must insert a phi
++        // node to break it so we have an operand. The only case this will
++        // insert useless phis is if we have irreducible control flow.
++        MemoryAccess *Result = MSSA->createMemoryPhi(BB);
++        CachedPreviousDef.insert({BB, Result});
++        SF.pop();
++        SF.top().rtn = Result;
++        continue;
++      } else if (VisitedBlocks.insert(BB).second) {
++        // Mark us visited so we can detect a cycle
++        SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps;
++
++        // Recurse to get the values in our predecessors for placement of a
++        // potential phi node. This will insert phi nodes if we cycle in order
++        // to break the cycle and have an operand.
++        bool UniqueIncomingAccess = true;
++        MemoryAccess *SingleAccess = nullptr;
++        bool halt = false;
++        for (auto PredIt = predecessors(BB).begin();
++             PredIt != predecessors(BB).end(); PredIt++) {
++          auto Pred = *PredIt;
++          if (MSSA->DT->isReachableFromEntry(Pred)) {
++            MemoryAccess *prevDefFromEnd = nullptr;
++            auto *Defs = MSSA->getWritableBlockDefs(Pred);
++            if (Defs) {
++              CachedPreviousDef.insert({Pred, &*Defs->rbegin()});
++              prevDefFromEnd = &*Defs->rbegin();
++            } else {
++              SF.top().st = CASE5;
++              SF.push({Pred, nullptr, COMMON});
++              SF5.push({
++                  std::move(PhiOps), UniqueIncomingAccess, SingleAccess,
++                        std::move(PredIt)
++              });
++              halt = true;
++              break;
++            }
++            auto *IncomingAccess = prevDefFromEnd;
++            if (!SingleAccess)
++              SingleAccess = IncomingAccess;
++            else if (IncomingAccess != SingleAccess)
++              UniqueIncomingAccess = false;
++            PhiOps.push_back(IncomingAccess);
++          } else
++            PhiOps.push_back(MSSA->getLiveOnEntryDef());
++        }
++        if (halt)
++          continue;
++
++        auto Result =
++            Case5AfterLoop(PhiOps, UniqueIncomingAccess, SingleAccess, BB);
++
++        // Set ourselves up for the next variable by resetting visited state.
++        VisitedBlocks.erase(BB);
++        CachedPreviousDef.insert({BB, Result});
++        SF.pop();
++        SF.top().rtn = Result;
++        continue;
++      }
++      llvm_unreachable("Should have hit one of the five cases above");
++    } else if (CASE3 == SF.top().st) {
++      auto Result = SF.top().rtn;
++      CachedPreviousDef.insert({SF.top().bb, Result});
++      SF.pop();
++      SF.top().rtn = Result;
++      continue;
++    } else { // CASE5
++      // recover header
++      auto &PhiOps = SF5.top().PhiOps;
++      auto &UniqueIncomingAccess = SF5.top().UniqueIncomingAccess;
++      auto &SingleAccess = SF5.top().SingleAccess;
++      auto &PredIt = SF5.top().PredIt;
++      auto IncomingAccess = SF.top().rtn;
++      auto BB = SF.top().bb;
++
++      // in-loop remaining code
++      if (!SingleAccess)
++        SingleAccess = IncomingAccess;
++      else if (IncomingAccess != SingleAccess)
++        UniqueIncomingAccess = false;
++      PhiOps.push_back(IncomingAccess);
++
++      // remaining loop
++      bool halt = false;
++      for (PredIt++; PredIt != predecessors(BB).end(); PredIt++) {
++        auto Pred = *PredIt;
++        if (MSSA->DT->isReachableFromEntry(Pred)) {
++          MemoryAccess *prevDefFromEnd = nullptr;
++          auto *Defs = MSSA->getWritableBlockDefs(Pred);
++          if (Defs) {
++            CachedPreviousDef.insert({Pred, &*Defs->rbegin()});
++            prevDefFromEnd = &*Defs->rbegin();
++          } else {
++            SF.push({Pred, nullptr, COMMON});
++            halt = true;
++            break;
++          }
++          auto *IncomingAccess = prevDefFromEnd;
++          if (!SingleAccess)
++            SingleAccess = IncomingAccess;
++          else if (IncomingAccess != SingleAccess)
++            UniqueIncomingAccess = false;
++          PhiOps.push_back(IncomingAccess);
++        } else
++          PhiOps.push_back(MSSA->getLiveOnEntryDef());
++      }
++      if (halt)
++        continue;
++      // after loop
++      auto Result =
++          Case5AfterLoop(PhiOps, UniqueIncomingAccess, SingleAccess, BB);
++      SF.pop();
++      SF.top().rtn = Result;
++      SF5.pop();
++      continue;
++    }
++
++    llvm_unreachable("Should have hit one of the three cases above");
+   }
+-  llvm_unreachable("Should have hit one of the three cases above");
++  assert(0 == SF5.size());
++  return SF.top().rtn;
+ }
+ 
+ // This starts at the memory access, and goes backwards in the block to find the
+@@ -138,7 +278,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) {
+   if (auto *LocalResult = getPreviousDefInBlock(MA))
+     return LocalResult;
+   DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
+-  return getPreviousDefRecursive(MA->getBlock(), CachedPreviousDef);
++  return getPreviousDefIterative(MA->getBlock(), CachedPreviousDef);
+ }
+ 
+ // This starts at the memory access, and goes backwards in the block to the find
+@@ -168,19 +308,6 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) {
+   return nullptr;
+ }
+ 
+-// This starts at the end of block
+-MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd(
+-    BasicBlock *BB,
+-    DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) {
+-  auto *Defs = MSSA->getWritableBlockDefs(BB);
+-
+-  if (Defs) {
+-    CachedPreviousDef.insert({BB, &*Defs->rbegin()});
+-    return &*Defs->rbegin();
+-  }
+-
+-  return getPreviousDefRecursive(BB, CachedPreviousDef);
+-}
+ // Recurse over a set of phi uses to eliminate the trivial ones
+ MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) {
+   if (!Phi)
+@@ -396,7 +523,17 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
+       auto *BBIDF = MPhi->getBlock();
+       for (auto *Pred : predecessors(BBIDF)) {
+         DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
+-        MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred);
++        // inline getPreviousDefFromEnd start
++        MemoryAccess *prevDefFromEnd = nullptr;
++        auto *Defs = MSSA->getWritableBlockDefs(Pred);
++        if (Defs) {
++          CachedPreviousDef.insert({Pred, &*Defs->rbegin()});
++          prevDefFromEnd = & * Defs->rbegin();
++        } else {
++          prevDefFromEnd = getPreviousDefIterative(Pred, CachedPreviousDef);
++        }
++        // inline getPreviousDefFromEnd end
++        MPhi->addIncoming(prevDefFromEnd, Pred);
+       }
+     }
+
+
+--
+2.43.0
+
diff --git a/external/llvm/releases/17.0.0/patches_external/no-autoupgrader-igc-struct-typed-intrinsic.patch b/external/llvm/releases/17.0.0/patches_external/no-autoupgrader-igc-struct-typed-intrinsic.patch
new file mode 100644
index 000000000..7f2d95464
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/no-autoupgrader-igc-struct-typed-intrinsic.patch
@@ -0,0 +1,39 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2025 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+
+/*========================== begin_copyright_notice ============================
+
+Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+See https://llvm.org/LICENSE.txt for license information.
+SPDX-License-Identifier: Apache-2.0 with LLVM-exception
+
+============================= end_copyright_notice ===========================*/
+
+This is a cherry-pick of commit https://github.com/llvm/llvm-project/commit/a87738f86b17f4a8dcde538c60826506e2a27ed1:
+
+"[AutoUpgrade] Don't upgrade intrinsics returning overloaded struct type"
+
+---
+
+diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
+index 7b9c55ff3..d0d5c9f4e 100644
+--- a/llvm/lib/IR/AutoUpgrade.cpp
++++ b/llvm/lib/IR/AutoUpgrade.cpp
+@@ -1131,7 +1131,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
+   }
+
+   auto *ST = dyn_cast<StructType>(F->getReturnType());
+-  if (ST && (!ST->isLiteral() || ST->isPacked())) {
++  if (ST && (!ST->isLiteral() || ST->isPacked()) &&
++      F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+     // Replace return type with literal non-packed struct. Only do this for
+     // intrinsics declared to return a struct, not for intrinsics with
+     // overloaded return type, in which case the exact struct type will be
+--
+2.43.0
+
diff --git a/external/llvm/releases/17.0.0/patches_external/unify-max-alignment-with-generic-max.patch b/external/llvm/releases/17.0.0/patches_external/unify-max-alignment-with-generic-max.patch
new file mode 100644
index 000000000..37d126231
--- /dev/null
+++ b/external/llvm/releases/17.0.0/patches_external/unify-max-alignment-with-generic-max.patch
@@ -0,0 +1,143 @@
+/*========================== begin_copyright_notice ============================
+
+Copyright (C) 2025 Intel Corporation
+
+SPDX-License-Identifier: MIT
+
+============================= end_copyright_notice ===========================*/
+
+/*========================== begin_copyright_notice ============================
+
+Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+See https://llvm.org/LICENSE.txt for license information.
+SPDX-License-Identifier: Apache-2.0 with LLVM-exception
+
+============================= end_copyright_notice ===========================*/
+
+This is backport of this fix: https://github.com/llvm/llvm-project/pull/99257
+
+"[IR] Unify max alignment for arguments with generic max align."
+
+From 47f034550a5fb9ef6adee6347cd3c00e70ca663d Mon Sep 17 00:00:00 2001
+From: Eli Friedman <efriedma@quicinc.com>
+Date: Tue, 16 Jul 2024 16:03:38 -0700
+Subject: [PATCH 1/3] Unify max alignment for arguments with generic max align.
+
+The 2^14 limit was completely arbitrary; the generic limit is still
+arbitrary, but at least it's the same arbitrary limit as everything
+else.
+
+While I'm here, also add a verifier check for the ByValOrByRefSize.
+---
+ llvm/include/llvm/CodeGen/TargetCallingConv.h |  8 ++---
+ llvm/lib/IR/Verifier.cpp                      | 34 +++++++++++--------
+ 2 files changed, 23 insertions(+), 19 deletions(-)
+
+diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
+index 89ea9bcb2a40..70a2d8e5faaf 100644
+--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
++++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
+@@ -45,9 +45,9 @@ namespace ISD {
+     unsigned IsHva : 1;        ///< HVA field for
+     unsigned IsHvaStart : 1;   ///< HVA structure start
+     unsigned IsSecArgPass : 1; ///< Second argument
+-    unsigned MemAlign : 4;     ///< Log 2 of alignment when arg is passed in memory
+-                               ///< (including byval/byref). The max alignment is
+-                               ///< verified in IR verification.
++    unsigned MemAlign : 6; ///< Log 2 of alignment when arg is passed in memory
++                           ///< (including byval/byref). The max alignment is
++                           ///< verified in IR verification.
+     unsigned OrigAlign : 5;    ///< Log 2 of original alignment
+     unsigned IsInConsecutiveRegsLast : 1;
+     unsigned IsInConsecutiveRegs : 1;
+@@ -67,7 +67,7 @@ namespace ISD {
+           IsSecArgPass(0), MemAlign(0), OrigAlign(0),
+           IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
+           IsCopyElisionCandidate(0), IsPointer(0) {
+-      static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big");
++      static_assert(sizeof(*this) == 4 * sizeof(unsigned), "flags are too big");
+     }
+ 
+     bool isZExt() const { return IsZExt; }
+diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
+index 1408ce293ca6..2370bfd0a2be 100644
+--- a/llvm/lib/IR/Verifier.cpp
++++ b/llvm/lib/IR/Verifier.cpp
+@@ -291,13 +291,6 @@ namespace {
+ 
+ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
+   friend class InstVisitor<Verifier>;
+-
+-  // ISD::ArgFlagsTy::MemAlign only have 4 bits for alignment, so
+-  // the alignment size should not exceed 2^15. Since encode(Align)
+-  // would plus the shift value by 1, the alignment size should
+-  // not exceed 2^14, otherwise it can NOT be properly lowered
+-  // in backend.
+-  static constexpr unsigned ParamMaxAlignment = 1 << 14;
+   DominatorTree DT;
+ 
+   /// When verifying a basic block, keep track of all of the
+@@ -1939,31 +1932,43 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
+   }
+ 
+   if (isa<PointerType>(Ty)) {
++    if (Attrs.hasAttribute(Attribute::Alignment)) {
++      Align AttrAlign = Attrs.getAlignment().valueOrOne();
++      Check(AttrAlign.value() <= Value::MaximumAlignment,
++            "huge alignment values are unsupported", V);
++    }
+     if (Attrs.hasAttribute(Attribute::ByVal)) {
+-      if (Attrs.hasAttribute(Attribute::Alignment)) {
+-        Align AttrAlign = Attrs.getAlignment().valueOrOne();
+-        Align MaxAlign(ParamMaxAlignment);
+-        Check(AttrAlign <= MaxAlign,
+-              "Attribute 'align' exceed the max size 2^14", V);
+-      }
+       SmallPtrSet<Type *, 4> Visited;
+       Check(Attrs.getByValType()->isSized(&Visited),
+             "Attribute 'byval' does not support unsized types!", V);
++      Check(DL.getTypeAllocSize(Attrs.getByValType()).getKnownMinValue() <
++                (1ULL << 32),
++            "huge 'byval' arguments are unsupported", V);
+     }
+     if (Attrs.hasAttribute(Attribute::ByRef)) {
+       SmallPtrSet<Type *, 4> Visited;
+       Check(Attrs.getByRefType()->isSized(&Visited),
+             "Attribute 'byref' does not support unsized types!", V);
++      Check(DL.getTypeAllocSize(Attrs.getByRefType()).getKnownMinValue() <
++                (1ULL << 32),
++            "huge 'byref' arguments are unsupported", V);
+     }
+     if (Attrs.hasAttribute(Attribute::InAlloca)) {
+       SmallPtrSet<Type *, 4> Visited;
+       Check(Attrs.getInAllocaType()->isSized(&Visited),
+             "Attribute 'inalloca' does not support unsized types!", V);
++      Check(DL.getTypeAllocSize(Attrs.getInAllocaType()).getKnownMinValue() <
++                (1ULL << 32),
++            "huge 'inalloca' arguments are unsupported", V);
+     }
+     if (Attrs.hasAttribute(Attribute::Preallocated)) {
+       SmallPtrSet<Type *, 4> Visited;
+       Check(Attrs.getPreallocatedType()->isSized(&Visited),
+             "Attribute 'preallocated' does not support unsized types!", V);
++      Check(
++        DL.getTypeAllocSize(Attrs.getPreallocatedType()).getKnownMinValue() <
++            (1ULL << 32),
++        "huge 'preallocated' arguments are unsupported", V);
+     }
+   }
+ 
+@@ -3424,8 +3429,7 @@ void Verifier::visitCallBase(CallBase &Call) {
+     if (!Ty->isSized())
+       return;
+     Align ABIAlign = DL.getABITypeAlign(Ty);
+-    Align MaxAlign(ParamMaxAlignment);
+-    Check(ABIAlign <= MaxAlign,
++    Check(ABIAlign.value() <= Value::MaximumAlignment,
+           "Incorrect alignment of " + Message + " to called function!", Call);
+   };
+
+
+--
+2.43.0
+