mirror of
				https://github.com/intel/intel-graphics-compiler.git
				synced 2025-10-30 08:18:26 +08:00 
			
		
		
		
	Additional LLVM patching
Add necessary LLVM patches.
This commit is contained in:
		 Mielczarek, Aleksander
					Mielczarek, Aleksander
				
			
				
					committed by
					
						 igcbot
						igcbot
					
				
			
			
				
	
			
			
			 igcbot
						igcbot
					
				
			
						parent
						
							7d69a9f62e
						
					
				
				
					commit
					9d5bec3237
				
			| @ -0,0 +1,32 @@ | ||||
| From 84d340ed615c3601a2f46178acce2040d9d114f9 Mon Sep 17 00:00:00 2001 | ||||
| From: Victor Mustya <victor.mustya@intel.com> | ||||
| Date: Mon, 31 Oct 2022 13:27:02 -0700 | ||||
| Subject: =?UTF-8?q?[Backport]=20When=20creating=20a=20stack=20space=20for?= | ||||
|  =?UTF-8?q?=20inlined=20byval=20args,=0A=20use=20the=20same=20addrspace=20?= | ||||
|  =?UTF-8?q?as=20the=20original=20argument.?= | ||||
|  | ||||
| From: Chang-Sun Lin Jr <chang-sun.lin.jr@intel.com> | ||||
| --- | ||||
|  llvm/lib/Transforms/Utils/InlineFunction.cpp | 6 ++++++ | ||||
|  1 file changed, 6 insertions(+) | ||||
|  | ||||
| diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp | ||||
| index 399c9a43793f..bfb027568227 100644 | ||||
| --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp | ||||
| +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp | ||||
| @@ -1599,6 +1599,12 @@ static Value *HandleByValArgument(Type *ByValType, Value *Arg, | ||||
|                       Arg->getName(), &*Caller->begin()->begin()); | ||||
|    IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca)); | ||||
|   | ||||
| +  // If the byval was in a different address space, add a cast. | ||||
| +  if (DL.getAllocaAddrSpace() != Arg->getType()->getPointerAddressSpace()) { | ||||
| +    NewAlloca = new AddrSpaceCastInst( | ||||
| +        NewAlloca, Arg->getType(), "", | ||||
| +        cast<Instruction>(NewAlloca)->getNextNonDebugInstruction()); | ||||
| +  } | ||||
|    // Uses of the argument in the function should use our new alloca | ||||
|    // instead. | ||||
|    return NewAlloca; | ||||
| -- | ||||
| 2.43.0 | ||||
|  | ||||
| @ -0,0 +1,28 @@ | ||||
| From 881bf715f06201a57a4f1a60155b556fedd556db Mon Sep 17 00:00:00 2001 | ||||
| From: Victor Mustya <victor.mustya@intel.com> | ||||
| Date: Tue, 22 Aug 2023 11:10:30 -0700 | ||||
| Subject: [PATCH] Don't emit bitreverse or bswap intrinsics of illegal bit | ||||
|  width during instcombine | ||||
|  | ||||
| --- | ||||
|  llvm/lib/Transforms/Utils/Local.cpp | 4 ++++ | ||||
|  1 file changed, 4 insertions(+) | ||||
|  | ||||
| diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp | ||||
| index b2ed95b05..476a5c4c1 100644 | ||||
| --- a/llvm/lib/Transforms/Utils/Local.cpp | ||||
| +++ b/llvm/lib/Transforms/Utils/Local.cpp | ||||
| @@ -3312,6 +3312,10 @@ bool llvm::recognizeBSwapOrBitReverseIdiom( | ||||
|    if (DemandedBW > ITy->getScalarSizeInBits()) | ||||
|      return false; | ||||
|   | ||||
| +  auto &DL = I->getModule()->getDataLayout(); | ||||
| +  if (DL.isIllegalInteger(DemandedBW)) | ||||
| +    return false; | ||||
| + | ||||
|    // Now, is the bit permutation correct for a bswap or a bitreverse? We can | ||||
|    // only byteswap values with an even number of bytes. | ||||
|    APInt DemandedMask = APInt::getAllOnes(DemandedBW); | ||||
| -- | ||||
| 2.43.0 | ||||
|  | ||||
| @ -0,0 +1,71 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2024 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| From 58b5b7d4ed6204f61feeda68c7c1abe24bc143b1 Mon Sep 17 00:00:00 2001 | ||||
| From: Victor Mustya <victor.mustya@intel.com> | ||||
| Date: Tue, 16 Jan 2024 14:13:05 -0800 | ||||
| Subject: [InstCombine] Only fold bitcast(fptrunc) if destination type matches | ||||
|  fptrunc result type. (#77046) | ||||
|  | ||||
| It's not enough to just make sure destination type is floating point, | ||||
| because the following chain may be incorrectly optimized: | ||||
| ```LLVM | ||||
|   %trunc = fptrunc float %src to bfloat | ||||
|   %cast = bitcast bfloat %trunc to half | ||||
| ``` | ||||
| Before the fix, the instruction sequence mentioned above used to be | ||||
| translated into single fptrunc instruction as follows: | ||||
| ```LLVM | ||||
|   %trunc = fptrunc float %src to half | ||||
| ``` | ||||
|  | ||||
| Such transformation was semantically incorrect. | ||||
| --- | ||||
|  llvm/lib/IR/Instructions.cpp                |  4 ++-- | ||||
|  llvm/test/Transforms/InstCombine/fptrunc.ll | 13 +++++++++++++ | ||||
|  2 files changed, 15 insertions(+), 2 deletions(-) | ||||
|  | ||||
| diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp | ||||
| index 7c343a0ff..932fc66a8 100644 | ||||
| --- a/llvm/lib/IR/Instructions.cpp | ||||
| +++ b/llvm/lib/IR/Instructions.cpp | ||||
| @@ -3218,8 +3218,8 @@ unsigned CastInst::isEliminableCastPair( | ||||
|        return 0; | ||||
|      case 4: | ||||
|        // No-op cast in second op implies firstOp as long as the DestTy | ||||
| -      // is floating point. | ||||
| -      if (DstTy->isFloatingPointTy()) | ||||
| +      // matches MidTy. | ||||
| +      if (DstTy == MidTy) | ||||
|          return firstOp; | ||||
|        return 0; | ||||
|      case 5: | ||||
| diff --git a/llvm/test/Transforms/InstCombine/fptrunc.ll b/llvm/test/Transforms/InstCombine/fptrunc.ll | ||||
| index d3e153f12..c78df0b83 100644 | ||||
| --- a/llvm/test/Transforms/InstCombine/fptrunc.ll | ||||
| +++ b/llvm/test/Transforms/InstCombine/fptrunc.ll | ||||
| @@ -190,3 +190,16 @@ define half @ItoFtoF_u25_f32_f16(i25 %i) { | ||||
|    %r = fptrunc float %x to half | ||||
|    ret half %r | ||||
|  } | ||||
| + | ||||
| +; Negative test - bitcast bfloat to half is not optimized | ||||
| + | ||||
| +define half @fptrunc_to_bfloat_bitcast_to_half(float %src) { | ||||
| +; CHECK-LABEL: @fptrunc_to_bfloat_bitcast_to_half( | ||||
| +; CHECK-NEXT:    [[TRUNC:%.*]] = fptrunc float [[SRC:%.*]] to bfloat | ||||
| +; CHECK-NEXT:    [[CAST:%.*]] = bitcast bfloat [[TRUNC]] to half | ||||
| +; CHECK-NEXT:    ret half [[CAST]] | ||||
| +; | ||||
| +  %trunc = fptrunc float %src to bfloat | ||||
| +  %cast = bitcast bfloat %trunc to half | ||||
| +  ret half %cast | ||||
| +} | ||||
| -- | ||||
| 2.34.1 | ||||
|  | ||||
| @ -0,0 +1,99 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2025 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| See https://llvm.org/LICENSE.txt for license information. | ||||
| SPDX-License-Identifier: Apache-2.0 with LLVM-exception | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| From c5c679933c462f28fac7358841a23ee32c292a47 Mon Sep 17 00:00:00 2001 | ||||
| From: peterbell10 <peterbell10@openai.com> | ||||
| Date: Wed, 20 Nov 2024 21:06:57 +0000 | ||||
| Subject: [PATCH] [InstCombine] Only fold extract element to trunc if vector | ||||
|  `hasOneUse` (#115627) | ||||
|  | ||||
| This fixes a missed optimization caused by the `foldBitcastExtElt` | ||||
| pattern interfering with other combine patterns. In the case I was | ||||
| hitting, we have IR that combines two vectors into a new larger vector | ||||
| by extracting elements and inserting them into the new vector. | ||||
|  | ||||
| ```llvm | ||||
| define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) { | ||||
|   %avec = bitcast i32 %a to <2 x half> | ||||
|   %a0 = extractelement <2 x half> %avec, i32 0 | ||||
|   %a1 = extractelement <2 x half> %avec, i32 1 | ||||
|   %bvec = bitcast i32 %b to <2 x half> | ||||
|   %b0 = extractelement <2 x half> %bvec, i32 0 | ||||
|   %b1 = extractelement <2 x half> %bvec, i32 1 | ||||
|   %ins0 = insertelement <4 x half> undef, half %a0, i32 0 | ||||
|   %ins1 = insertelement <4 x half> %ins0, half %a1, i32 1 | ||||
|   %ins2 = insertelement <4 x half> %ins1, half %b0, i32 2 | ||||
|   %ins3 = insertelement <4 x half> %ins2, half %b1, i32 3 | ||||
|   ret <4 x half> %ins3 | ||||
| } | ||||
| ``` | ||||
|  | ||||
| With the current behavior, `InstCombine` converts each vector extract | ||||
| sequence to | ||||
|  | ||||
| ```llvm | ||||
|   %tmp = trunc i32 %a to i16 | ||||
|   %a0 = bitcast i16 %tmp to half | ||||
|   %a1 = extractelement <2 x half> %avec, i32 1 | ||||
| ``` | ||||
|  | ||||
| where the extraction of `%a0` is now done by truncating the original | ||||
| integer. While on it's own this is fairly reasonable, in this case it | ||||
| also blocks the pattern which converts `extractelement` - | ||||
| `insertelement` into shuffles which gives the overall simpler result: | ||||
|  | ||||
| ```llvm | ||||
| define <4 x half> @bitcast_extract_insert_to_shuffle(i32 %a, i32 %b) { | ||||
|   %avec = bitcast i32 %a to <2 x half> | ||||
|   %bvec = bitcast i32 %b to <2 x half> | ||||
|   %ins3 = shufflevector <2 x half> %avec, <2 x half> %bvec, <4 x i32> <i32 0, i32 1, i32 2, i32 3> | ||||
|   ret <4 x half> %ins3 | ||||
| } | ||||
| ``` | ||||
|  | ||||
| In this PR I fix the conflict by obeying the `hasOneUse` check even if | ||||
| there is no shift instruction required. In these cases we can't remove | ||||
| the vector completely, so the pattern has less benefit anyway. | ||||
|  | ||||
| Also fwiw, I think dropping the `hasOneUse` check for the 0th element | ||||
| might have been a mistake in the first place. Looking at | ||||
| https://github.com/llvm/llvm-project/commit/535c5d56a7bc9966036a11362d8984983a4bf090 | ||||
| the commit message only mentions loosening the `isDesirableIntType` | ||||
| requirement and doesn't mention changing the `hasOneUse` check at all. | ||||
| --- | ||||
|  llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 6 +++--- | ||||
|  1 file changed, 3 insertions(+), 3 deletions(-) | ||||
|  | ||||
| diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | ||||
| index 61e62adbe327..d3b30848ab8b 100644 | ||||
| --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | ||||
| +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp | ||||
| @@ -202,9 +202,9 @@ Instruction *InstCombinerImpl::foldBitcastExtElt(ExtractElementInst &Ext) { | ||||
|      if (IsBigEndian) | ||||
|        ExtIndexC = NumElts.getKnownMinValue() - 1 - ExtIndexC; | ||||
|      unsigned ShiftAmountC = ExtIndexC * DestWidth; | ||||
| -    if (!ShiftAmountC || | ||||
| -        (isDesirableIntType(X->getType()->getPrimitiveSizeInBits()) && | ||||
| -        Ext.getVectorOperand()->hasOneUse())) { | ||||
| +    if ((!ShiftAmountC || | ||||
| +         isDesirableIntType(X->getType()->getPrimitiveSizeInBits())) && | ||||
| +        Ext.getVectorOperand()->hasOneUse()) { | ||||
|        if (ShiftAmountC) | ||||
|          X = Builder.CreateLShr(X, ShiftAmountC, "extelt.offset"); | ||||
|        if (DestTy->isFloatingPointTy()) { | ||||
| --  | ||||
| 2.43.0 | ||||
|  | ||||
							
								
								
									
										46
									
								
								external/llvm/releases/17.0.0/patches_external/LowerSwitch-RemoveUnreachableBBs.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								external/llvm/releases/17.0.0/patches_external/LowerSwitch-RemoveUnreachableBBs.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2024 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| See https://llvm.org/LICENSE.txt for license information. | ||||
| SPDX-License-Identifier: Apache-2.0 with LLVM-exception | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
| The reason for removing unreachable blocks is this change in the LLVM repo: | ||||
| https://github.com/llvm/llvm-project/commit/1065f3439bad59323f16e7c8ee568c7d94dcd952 | ||||
| LowerSwitchPass can leave phi instructions with nodes from unreachable basic blocks | ||||
| which is a disallowed state for DomTree. | ||||
|  | ||||
| diff --git a/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/llvm/lib/Transforms/Utils/LowerSwitch.cpp | ||||
| index 227de425ff85..8d089c2a754c 100644 | ||||
| --- a/llvm/lib/Transforms/Utils/LowerSwitch.cpp | ||||
| +++ b/llvm/lib/Transforms/Utils/LowerSwitch.cpp | ||||
| @@ -38,6 +38,7 @@ | ||||
|  #include "llvm/Support/raw_ostream.h" | ||||
|  #include "llvm/Transforms/Utils.h" | ||||
|  #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||||
| +#include "llvm/Transforms/Utils/Local.h" | ||||
|  #include <algorithm> | ||||
|  #include <cassert> | ||||
|  #include <cstdint> | ||||
| @@ -556,6 +557,10 @@ bool LowerSwitch(Function &F, LazyValueInfo *LVI, AssumptionCache *AC) { | ||||
|      DeleteDeadBlock(BB); | ||||
|    } | ||||
|   | ||||
| +  if (!DeleteList.empty()) { | ||||
| +    removeUnreachableBlocks(F); | ||||
| +  } | ||||
| + | ||||
|    return Changed; | ||||
|  } | ||||
|   | ||||
|  | ||||
| -- | ||||
| 2.34.1 | ||||
|  | ||||
							
								
								
									
										46
									
								
								external/llvm/releases/17.0.0/patches_external/Remove-too-strict-restrictions-in-LICM-pass.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								external/llvm/releases/17.0.0/patches_external/Remove-too-strict-restrictions-in-LICM-pass.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2025 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| The reason for removal of below condition was that it took a very strict | ||||
| approach to the Convergent attribute, which caused missed optimization | ||||
| opportunities in cases where it was safe to do so. | ||||
| The decision is based on the discussion in LLVM RFC | ||||
| https://reviews.llvm.org/D90361?id=303195 | ||||
| This patch should be considered obsolete if LICM introduces a more | ||||
| advanced approach to the Convergent attribute in the future version of | ||||
| LLVM. | ||||
| --- | ||||
| diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp | ||||
| index 2865dece8..f879176b3 100644 | ||||
| --- a/llvm/lib/Transforms/Scalar/LICM.cpp | ||||
| +++ b/llvm/lib/Transforms/Scalar/LICM.cpp | ||||
| @@ -1202,8 +1202,18 @@ bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT, | ||||
|      // inter-thread communication which results are implicitly affected by the | ||||
|      // enclosing control flows. It is not safe to hoist or sink such operations | ||||
|      // across control flow. | ||||
| -    if (CI->isConvergent()) | ||||
| -      return false; | ||||
| + | ||||
| +    // The reason for removal of below condition was that it took a very strict | ||||
| +    // approach to the Convergent attribute, which caused missed optimization | ||||
| +    // opportunities in cases where it was safe to do so. | ||||
| +    // The decision is based on the discussion in LLVM RFC | ||||
| +    // https://reviews.llvm.org/D90361?id=303195 | ||||
| +    // This patch should be considered obsolete if LICM introduces a more | ||||
| +    // advanced approach to the Convergent attribute in the future version of | ||||
| +    // LLVM. | ||||
| + | ||||
| +    //if (CI->isConvergent()) | ||||
| +    //  return false; | ||||
|   | ||||
|      using namespace PatternMatch; | ||||
|      if (match(CI, m_Intrinsic<Intrinsic::assume>())) | ||||
| -- | ||||
| 2.43.0 | ||||
|  | ||||
|  | ||||
| @ -0,0 +1,44 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2025 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| See https://llvm.org/LICENSE.txt for license information. | ||||
| SPDX-License-Identifier: Apache-2.0 with LLVM-exception | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| This comes from patches of LLVM 14 (copy-pasted from there) | ||||
|  | ||||
| diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | ||||
| index 9beb2281c..a3cc73ca5 100644 | ||||
| --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | ||||
| +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | ||||
| @@ -968,7 +968,7 @@ bool llvm::computeUnrollCount( | ||||
|    // cost of exact full unrolling.  As such, if we have an exact count and | ||||
|    // found it unprofitable, we'll never chose to bounded unroll. | ||||
|    if (!TripCount && MaxTripCount && (UP.UpperBound || MaxOrZero) && | ||||
| -      MaxTripCount <= UnrollMaxUpperBound) { | ||||
| +      MaxTripCount < std::max(16U, UnrollMaxUpperBound.getValue())) { | ||||
|      UP.Count = MaxTripCount; | ||||
|      if (auto UnrollFactor = shouldFullUnroll(L, TTI, DT, SE, EphValues, | ||||
|                                               MaxTripCount, UCE, UP)) { | ||||
| @@ -1042,7 +1042,8 @@ bool llvm::computeUnrollCount( | ||||
|    } | ||||
|  | ||||
|    // Don't unroll a small upper bound loop unless user or TTI asked to do so. | ||||
| -  if (MaxTripCount && !UP.Force && MaxTripCount < UnrollMaxUpperBound) { | ||||
| +  if (MaxTripCount && !UP.Force && | ||||
| +      MaxTripCount < std::max(16U, UnrollMaxUpperBound.getValue())) { | ||||
|      UP.Count = 0; | ||||
|      return false; | ||||
|    } | ||||
| -- | ||||
| 2.43.0 | ||||
|  | ||||
							
								
								
									
										49
									
								
								external/llvm/releases/17.0.0/patches_external/check-for-NaN-before-folding-select-for-FP.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										49
									
								
								external/llvm/releases/17.0.0/patches_external/check-for-NaN-before-folding-select-for-FP.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,49 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2023 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| See https://llvm.org/LICENSE.txt for license information. | ||||
| SPDX-License-Identifier: Apache-2.0 with LLVM-exception | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| # TODO: Once upstreamed, update with LLORG revision & adjust per community review | ||||
|  | ||||
| From 492a1c879f338c3f12ef4d2f619ca2c8f2467da8 Mon Sep 17 00:00:00 2001 | ||||
| From: Artem Gindinson <artem.gindinson@intel.com> | ||||
| Date: Wed, 23 Aug 2023 15:41:51 +0200 | ||||
| Subject: [PATCH] [InstCombine] Check for NaN before folding `select` for FP | ||||
|  operators | ||||
|  | ||||
| --- | ||||
|  llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 6 +++++- | ||||
|  1 file changed, 5 insertions(+), 1 deletion(-) | ||||
|  | ||||
| diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | ||||
| index e7d8208f9..341d8fc49 100644 | ||||
| --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | ||||
| +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | ||||
| @@ -484,8 +484,12 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal, | ||||
|      // instructions have different flags and add tests to ensure the | ||||
|      // behaviour is correct. | ||||
|      FastMathFlags FMF; | ||||
| -    if (isa<FPMathOperator>(&SI)) | ||||
| +    if (isa<FPMathOperator>(&SI)) { | ||||
|        FMF = SI.getFastMathFlags(); | ||||
| +      // Avoid folding on NaN inputs | ||||
| +      if (!FMF.noNaNs()) | ||||
| +        return nullptr; | ||||
| +    } | ||||
|      Constant *C = ConstantExpr::getBinOpIdentity( | ||||
|          TVI->getOpcode(), TVI->getType(), true, FMF.noSignedZeros()); | ||||
|      Value *OOp = TVI->getOperand(2 - OpToFold); | ||||
| -- | ||||
| 2.43.0 | ||||
|  | ||||
| @ -0,0 +1,29 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2025 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| #    Description                  : Fix ambiguous evaluation order | ||||
|  | ||||
| diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp | ||||
| --- a/llvm/lib/IR/DebugInfo.cpp | ||||
| +++ b/llvm/lib/IR/DebugInfo.cpp | ||||
| @@ -683,7 +683,11 @@ private: | ||||
|  | ||||
|        return getReplacementMDNode(N); | ||||
|      }; | ||||
| -    Replacements[N] = doRemap(N); | ||||
| +    // Seperate recursive doRemap and operator [] into 2 lines to avoid | ||||
| +    // out-of-order evaluations since both of them can access the same memory | ||||
| +    // location in map Replacements. | ||||
| +    auto Value = doRemap(N); | ||||
| +    Replacements[N] = Value; | ||||
|    } | ||||
|  | ||||
|    /// Do the remapping traversal. | ||||
| -- | ||||
| 2.43.0 | ||||
|  | ||||
							
								
								
									
										367
									
								
								external/llvm/releases/17.0.0/patches_external/make-getPreviousDefRecursive-iterative.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										367
									
								
								external/llvm/releases/17.0.0/patches_external/make-getPreviousDefRecursive-iterative.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,367 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2025 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| #    Description                  : Refactor getPreviousDefRecursive to getPreviousDefIterative | ||||
|  | ||||
| diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h | ||||
| --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h | ||||
| +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h | ||||
| @@ -251,10 +251,7 @@ private: | ||||
|    MemoryAccess *getPreviousDef(MemoryAccess *); | ||||
|    MemoryAccess *getPreviousDefInBlock(MemoryAccess *); | ||||
|    MemoryAccess * | ||||
| -  getPreviousDefFromEnd(BasicBlock *, | ||||
| -                        DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &); | ||||
| -  MemoryAccess * | ||||
| -  getPreviousDefRecursive(BasicBlock *, | ||||
| +  getPreviousDefIterative(BasicBlock *, | ||||
|                            DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &); | ||||
|    MemoryAccess *recursePhi(MemoryAccess *Phi); | ||||
|    MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi); | ||||
| diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp | ||||
| --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp | ||||
| +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp | ||||
| @@ -20,6 +20,7 @@ | ||||
|  #include "llvm/IR/Dominators.h" | ||||
|  #include "llvm/Support/Debug.h" | ||||
|  #include <algorithm> | ||||
| +#include <stack> | ||||
|   | ||||
|  #define DEBUG_TYPE "memoryssa" | ||||
|  using namespace llvm; | ||||
| @@ -33,66 +34,42 @@ using namespace llvm; | ||||
|  // that there are two or more definitions needing to be merged. | ||||
|  // This still will leave non-minimal form in the case of irreducible control | ||||
|  // flow, where phi nodes may be in cycles with themselves, but unnecessary. | ||||
| -MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( | ||||
| -    BasicBlock *BB, | ||||
| +MemoryAccess *MemorySSAUpdater::getPreviousDefIterative( | ||||
| +    BasicBlock *BBB, | ||||
|      DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) { | ||||
| -  // First, do a cache lookup. Without this cache, certain CFG structures | ||||
| -  // (like a series of if statements) take exponential time to visit. | ||||
| -  auto Cached = CachedPreviousDef.find(BB); | ||||
| -  if (Cached != CachedPreviousDef.end()) | ||||
| -    return Cached->second; | ||||
| - | ||||
| -  // If this method is called from an unreachable block, return LoE. | ||||
| -  if (!MSSA->DT->isReachableFromEntry(BB)) | ||||
| -    return MSSA->getLiveOnEntryDef(); | ||||
|   | ||||
| -  if (BasicBlock *Pred = BB->getUniquePredecessor()) { | ||||
| -    VisitedBlocks.insert(BB); | ||||
| -    // Single predecessor case, just recurse, we can only have one definition. | ||||
| -    MemoryAccess *Result = getPreviousDefFromEnd(Pred, CachedPreviousDef); | ||||
| -    CachedPreviousDef.insert({BB, Result}); | ||||
| -    return Result; | ||||
| -  } | ||||
| +  // There're 5 cases, case 3 (easy) and case 5 (hard) has recursives. | ||||
| +  // We need special states to handle their recursive returns | ||||
| +  enum State {COMMON, CASE3, CASE5}; | ||||
|   | ||||
| -  if (VisitedBlocks.count(BB)) { | ||||
| -    // We hit our node again, meaning we had a cycle, we must insert a phi | ||||
| -    // node to break it so we have an operand. The only case this will | ||||
| -    // insert useless phis is if we have irreducible control flow. | ||||
| -    MemoryAccess *Result = MSSA->createMemoryPhi(BB); | ||||
| -    CachedPreviousDef.insert({BB, Result}); | ||||
| -    return Result; | ||||
| -  } | ||||
| +  // This is the common frame required for everything | ||||
| +  struct Frame { | ||||
| +    BasicBlock *bb; | ||||
| +    MemoryAccess *rtn; | ||||
| +    State st; | ||||
| +  }; | ||||
|   | ||||
| -  if (VisitedBlocks.insert(BB).second) { | ||||
| -    // Mark us visited so we can detect a cycle | ||||
| +  // This is the additional info only required by Case 5 | ||||
| +  struct FrameCase5 { | ||||
|      SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps; | ||||
| +    bool UniqueIncomingAccess; | ||||
| +    MemoryAccess *SingleAccess; | ||||
| +    pred_iterator PredIt; | ||||
| +  }; | ||||
|   | ||||
| -    // Recurse to get the values in our predecessors for placement of a | ||||
| -    // potential phi node. This will insert phi nodes if we cycle in order to | ||||
| -    // break the cycle and have an operand. | ||||
| -    bool UniqueIncomingAccess = true; | ||||
| -    MemoryAccess *SingleAccess = nullptr; | ||||
| -    for (auto *Pred : predecessors(BB)) { | ||||
| -      if (MSSA->DT->isReachableFromEntry(Pred)) { | ||||
| -        auto *IncomingAccess = getPreviousDefFromEnd(Pred, CachedPreviousDef); | ||||
| -        if (!SingleAccess) | ||||
| -          SingleAccess = IncomingAccess; | ||||
| -        else if (IncomingAccess != SingleAccess) | ||||
| -          UniqueIncomingAccess = false; | ||||
| -        PhiOps.push_back(IncomingAccess); | ||||
| -      } else | ||||
| -        PhiOps.push_back(MSSA->getLiveOnEntryDef()); | ||||
| -    } | ||||
| - | ||||
| +  auto Case5AfterLoop = [&](SmallVector<TrackingVH<MemoryAccess>, 8> & PhiOps, | ||||
| +      bool & UniqueIncomingAccess, MemoryAccess *& SingleAccess, | ||||
| +      BasicBlock * BB) -> MemoryAccess * { | ||||
|      // Now try to simplify the ops to avoid placing a phi. | ||||
|      // This may return null if we never created a phi yet, that's okay | ||||
|      MemoryPhi *Phi = dyn_cast_or_null<MemoryPhi>(MSSA->getMemoryAccess(BB)); | ||||
|   | ||||
|      // See if we can avoid the phi by simplifying it. | ||||
| -    auto *Result = tryRemoveTrivialPhi(Phi, PhiOps); | ||||
| +    MemoryAccess *Result = tryRemoveTrivialPhi(Phi, PhiOps); | ||||
|      // If we couldn't simplify, we may have to create a phi | ||||
|      if (Result == Phi && UniqueIncomingAccess && SingleAccess) { | ||||
| -      // A concrete Phi only exists if we created an empty one to break a cycle. | ||||
| +      // A concrete Phi only exists if we created an empty one to break a | ||||
| +      // cycle. | ||||
|        if (Phi) { | ||||
|          assert(Phi->operands().empty() && "Expected empty Phi"); | ||||
|          Phi->replaceAllUsesWith(SingleAccess); | ||||
| @@ -104,12 +81,13 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( | ||||
|          Phi = MSSA->createMemoryPhi(BB); | ||||
|   | ||||
|        // See if the existing phi operands match what we need. | ||||
| -      // Unlike normal SSA, we only allow one phi node per block, so we can't just | ||||
| -      // create a new one. | ||||
| +      // Unlike normal SSA, we only allow one phi node per block, so we | ||||
| +      // can't just create a new one. | ||||
|        if (Phi->getNumOperands() != 0) { | ||||
|          // FIXME: Figure out whether this is dead code and if so remove it. | ||||
|          if (!std::equal(Phi->op_begin(), Phi->op_end(), PhiOps.begin())) { | ||||
| -          // These will have been filled in by the recursive read we did above. | ||||
| +          // These will have been filled in by the recursive read we did | ||||
| +          // above. | ||||
|            llvm::copy(PhiOps, Phi->op_begin()); | ||||
|            std::copy(pred_begin(BB), pred_end(BB), Phi->block_begin()); | ||||
|          } | ||||
| @@ -126,8 +104,170 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefRecursive( | ||||
|      VisitedBlocks.erase(BB); | ||||
|      CachedPreviousDef.insert({BB, Result}); | ||||
|      return Result; | ||||
| +  }; | ||||
| + | ||||
| +  // We may want to switch to vector to boot performance | ||||
| +  std::stack<Frame> SF; | ||||
| +  std::stack<FrameCase5> SF5; | ||||
| +  // The return frame | ||||
| +  SF.push({nullptr, nullptr, COMMON}); | ||||
| +  // The entry frame | ||||
| +  SF.push({BBB, nullptr, COMMON}); | ||||
| + | ||||
| +  while (SF.size() > 1) { | ||||
| + | ||||
| +    if (COMMON == SF.top().st) { | ||||
| +      auto BB = SF.top().bb; | ||||
| +      auto Cached = CachedPreviousDef.find(BB); | ||||
| +      if (Cached != CachedPreviousDef.end()) { | ||||
| +        SF.pop(); | ||||
| +        SF.top().rtn = Cached->second; | ||||
| +        continue; | ||||
| +      } else if (!MSSA->DT->isReachableFromEntry(BB)) { | ||||
| +        SF.pop(); | ||||
| +        SF.top().rtn = MSSA->getLiveOnEntryDef(); | ||||
| +        continue; | ||||
| +      } else if (BasicBlock *Pred = BB->getUniquePredecessor()) { | ||||
| +        VisitedBlocks.insert(BB); | ||||
| +        // Single predecessor case, just recurse, we can only have one | ||||
| +        // definition. | ||||
| +        MemoryAccess *prevDefFromEnd = nullptr; | ||||
| +        auto *Defs = MSSA->getWritableBlockDefs(Pred); | ||||
| +        if (Defs) { | ||||
| +          CachedPreviousDef.insert({Pred, &*Defs->rbegin()}); | ||||
| +          prevDefFromEnd = &*Defs->rbegin(); | ||||
| +        } else { | ||||
| +          SF.top().st = CASE3; | ||||
| +          SF.push({Pred, nullptr, COMMON}); | ||||
| +          continue; | ||||
| +        } | ||||
| +        MemoryAccess *Result = prevDefFromEnd; | ||||
| +        CachedPreviousDef.insert({BB, Result}); | ||||
| +        SF.pop(); | ||||
| +        SF.top().rtn = Result; | ||||
| +        continue; | ||||
| +      } else if (VisitedBlocks.count(BB)) { | ||||
| +        // We hit our node again, meaning we had a cycle, we must insert a phi | ||||
| +        // node to break it so we have an operand. The only case this will | ||||
| +        // insert useless phis is if we have irreducible control flow. | ||||
| +        MemoryAccess *Result = MSSA->createMemoryPhi(BB); | ||||
| +        CachedPreviousDef.insert({BB, Result}); | ||||
| +        SF.pop(); | ||||
| +        SF.top().rtn = Result; | ||||
| +        continue; | ||||
| +      } else if (VisitedBlocks.insert(BB).second) { | ||||
| +        // Mark us visited so we can detect a cycle | ||||
| +        SmallVector<TrackingVH<MemoryAccess>, 8> PhiOps; | ||||
| + | ||||
| +        // Recurse to get the values in our predecessors for placement of a | ||||
| +        // potential phi node. This will insert phi nodes if we cycle in order | ||||
| +        // to break the cycle and have an operand. | ||||
| +        bool UniqueIncomingAccess = true; | ||||
| +        MemoryAccess *SingleAccess = nullptr; | ||||
| +        bool halt = false; | ||||
| +        for (auto PredIt = predecessors(BB).begin(); | ||||
| +             PredIt != predecessors(BB).end(); PredIt++) { | ||||
| +          auto Pred = *PredIt; | ||||
| +          if (MSSA->DT->isReachableFromEntry(Pred)) { | ||||
| +            MemoryAccess *prevDefFromEnd = nullptr; | ||||
| +            auto *Defs = MSSA->getWritableBlockDefs(Pred); | ||||
| +            if (Defs) { | ||||
| +              CachedPreviousDef.insert({Pred, &*Defs->rbegin()}); | ||||
| +              prevDefFromEnd = &*Defs->rbegin(); | ||||
| +            } else { | ||||
| +              SF.top().st = CASE5; | ||||
| +              SF.push({Pred, nullptr, COMMON}); | ||||
| +              SF5.push({ | ||||
| +                  std::move(PhiOps), UniqueIncomingAccess, SingleAccess, | ||||
| +                        std::move(PredIt) | ||||
| +              }); | ||||
| +              halt = true; | ||||
| +              break; | ||||
| +            } | ||||
| +            auto *IncomingAccess = prevDefFromEnd; | ||||
| +            if (!SingleAccess) | ||||
| +              SingleAccess = IncomingAccess; | ||||
| +            else if (IncomingAccess != SingleAccess) | ||||
| +              UniqueIncomingAccess = false; | ||||
| +            PhiOps.push_back(IncomingAccess); | ||||
| +          } else | ||||
| +            PhiOps.push_back(MSSA->getLiveOnEntryDef()); | ||||
| +        } | ||||
| +        if (halt) | ||||
| +          continue; | ||||
| + | ||||
| +        auto Result = | ||||
| +            Case5AfterLoop(PhiOps, UniqueIncomingAccess, SingleAccess, BB); | ||||
| + | ||||
| +        // Set ourselves up for the next variable by resetting visited state. | ||||
| +        VisitedBlocks.erase(BB); | ||||
| +        CachedPreviousDef.insert({BB, Result}); | ||||
| +        SF.pop(); | ||||
| +        SF.top().rtn = Result; | ||||
| +        continue; | ||||
| +      } | ||||
| +      llvm_unreachable("Should have hit one of the five cases above"); | ||||
| +    } else if (CASE3 == SF.top().st) { | ||||
| +      auto Result = SF.top().rtn; | ||||
| +      CachedPreviousDef.insert({SF.top().bb, Result}); | ||||
| +      SF.pop(); | ||||
| +      SF.top().rtn = Result; | ||||
| +      continue; | ||||
| +    } else { // CASE5 | ||||
| +      // recover header | ||||
| +      auto &PhiOps = SF5.top().PhiOps; | ||||
| +      auto &UniqueIncomingAccess = SF5.top().UniqueIncomingAccess; | ||||
| +      auto &SingleAccess = SF5.top().SingleAccess; | ||||
| +      auto &PredIt = SF5.top().PredIt; | ||||
| +      auto IncomingAccess = SF.top().rtn; | ||||
| +      auto BB = SF.top().bb; | ||||
| + | ||||
| +      // in-loop remaining code | ||||
| +      if (!SingleAccess) | ||||
| +        SingleAccess = IncomingAccess; | ||||
| +      else if (IncomingAccess != SingleAccess) | ||||
| +        UniqueIncomingAccess = false; | ||||
| +      PhiOps.push_back(IncomingAccess); | ||||
| + | ||||
| +      // remaining loop | ||||
| +      bool halt = false; | ||||
| +      for (PredIt++; PredIt != predecessors(BB).end(); PredIt++) { | ||||
| +        auto Pred = *PredIt; | ||||
| +        if (MSSA->DT->isReachableFromEntry(Pred)) { | ||||
| +          MemoryAccess *prevDefFromEnd = nullptr; | ||||
| +          auto *Defs = MSSA->getWritableBlockDefs(Pred); | ||||
| +          if (Defs) { | ||||
| +            CachedPreviousDef.insert({Pred, &*Defs->rbegin()}); | ||||
| +            prevDefFromEnd = &*Defs->rbegin(); | ||||
| +          } else { | ||||
| +            SF.push({Pred, nullptr, COMMON}); | ||||
| +            halt = true; | ||||
| +            break; | ||||
| +          } | ||||
| +          auto *IncomingAccess = prevDefFromEnd; | ||||
| +          if (!SingleAccess) | ||||
| +            SingleAccess = IncomingAccess; | ||||
| +          else if (IncomingAccess != SingleAccess) | ||||
| +            UniqueIncomingAccess = false; | ||||
| +          PhiOps.push_back(IncomingAccess); | ||||
| +        } else | ||||
| +          PhiOps.push_back(MSSA->getLiveOnEntryDef()); | ||||
| +      } | ||||
| +      if (halt) | ||||
| +        continue; | ||||
| +      // after loop | ||||
| +      auto Result = | ||||
| +          Case5AfterLoop(PhiOps, UniqueIncomingAccess, SingleAccess, BB); | ||||
| +      SF.pop(); | ||||
| +      SF.top().rtn = Result; | ||||
| +      SF5.pop(); | ||||
| +      continue; | ||||
| +    } | ||||
| + | ||||
| +    llvm_unreachable("Should have hit one of the three cases above"); | ||||
|    } | ||||
| -  llvm_unreachable("Should have hit one of the three cases above"); | ||||
| +  assert(0 == SF5.size()); | ||||
| +  return SF.top().rtn; | ||||
|  } | ||||
|   | ||||
|  // This starts at the memory access, and goes backwards in the block to find the | ||||
| @@ -138,7 +278,7 @@ MemoryAccess *MemorySSAUpdater::getPreviousDef(MemoryAccess *MA) { | ||||
|    if (auto *LocalResult = getPreviousDefInBlock(MA)) | ||||
|      return LocalResult; | ||||
|    DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef; | ||||
| -  return getPreviousDefRecursive(MA->getBlock(), CachedPreviousDef); | ||||
| +  return getPreviousDefIterative(MA->getBlock(), CachedPreviousDef); | ||||
|  } | ||||
|   | ||||
|  // This starts at the memory access, and goes backwards in the block to the find | ||||
| @@ -168,19 +308,6 @@ MemoryAccess *MemorySSAUpdater::getPreviousDefInBlock(MemoryAccess *MA) { | ||||
|    return nullptr; | ||||
|  } | ||||
|   | ||||
| -// This starts at the end of block | ||||
| -MemoryAccess *MemorySSAUpdater::getPreviousDefFromEnd( | ||||
| -    BasicBlock *BB, | ||||
| -    DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> &CachedPreviousDef) { | ||||
| -  auto *Defs = MSSA->getWritableBlockDefs(BB); | ||||
| - | ||||
| -  if (Defs) { | ||||
| -    CachedPreviousDef.insert({BB, &*Defs->rbegin()}); | ||||
| -    return &*Defs->rbegin(); | ||||
| -  } | ||||
| - | ||||
| -  return getPreviousDefRecursive(BB, CachedPreviousDef); | ||||
| -} | ||||
|  // Recurse over a set of phi uses to eliminate the trivial ones | ||||
|  MemoryAccess *MemorySSAUpdater::recursePhi(MemoryAccess *Phi) { | ||||
|    if (!Phi) | ||||
| @@ -396,7 +523,17 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) { | ||||
|        auto *BBIDF = MPhi->getBlock(); | ||||
|        for (auto *Pred : predecessors(BBIDF)) { | ||||
|          DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef; | ||||
| -        MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef), Pred); | ||||
| +        // inline getPreviousDefFromEnd start | ||||
| +        MemoryAccess *prevDefFromEnd = nullptr; | ||||
| +        auto *Defs = MSSA->getWritableBlockDefs(Pred); | ||||
| +        if (Defs) { | ||||
| +          CachedPreviousDef.insert({Pred, &*Defs->rbegin()}); | ||||
| +          prevDefFromEnd = & * Defs->rbegin(); | ||||
| +        } else { | ||||
| +          prevDefFromEnd = getPreviousDefIterative(Pred, CachedPreviousDef); | ||||
| +        } | ||||
| +        // inline getPreviousDefFromEnd end | ||||
| +        MPhi->addIncoming(prevDefFromEnd, Pred); | ||||
|        } | ||||
|      } | ||||
|  | ||||
|  | ||||
| -- | ||||
| 2.43.0 | ||||
|  | ||||
							
								
								
									
										39
									
								
								external/llvm/releases/17.0.0/patches_external/no-autoupgrader-igc-struct-typed-intrinsic.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										39
									
								
								external/llvm/releases/17.0.0/patches_external/no-autoupgrader-igc-struct-typed-intrinsic.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,39 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2025 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| See https://llvm.org/LICENSE.txt for license information. | ||||
| SPDX-License-Identifier: Apache-2.0 with LLVM-exception | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| This is a cherry-pick of commit https://github.com/llvm/llvm-project/commit/a87738f86b17f4a8dcde538c60826506e2a27ed1: | ||||
|  | ||||
| "[AutoUpgrade] Don't upgrade intrinsics returning overloaded struct type" | ||||
|  | ||||
| --- | ||||
|  | ||||
| diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp | ||||
| index 7b9c55ff3..d0d5c9f4e 100644 | ||||
| --- a/llvm/lib/IR/AutoUpgrade.cpp | ||||
| +++ b/llvm/lib/IR/AutoUpgrade.cpp | ||||
| @@ -1131,7 +1131,8 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { | ||||
|    } | ||||
|  | ||||
|    auto *ST = dyn_cast<StructType>(F->getReturnType()); | ||||
| -  if (ST && (!ST->isLiteral() || ST->isPacked())) { | ||||
| +  if (ST && (!ST->isLiteral() || ST->isPacked()) && | ||||
| +      F->getIntrinsicID() != Intrinsic::not_intrinsic) { | ||||
|      // Replace return type with literal non-packed struct. Only do this for | ||||
|      // intrinsics declared to return a struct, not for intrinsics with | ||||
|      // overloaded return type, in which case the exact struct type will be | ||||
| -- | ||||
| 2.43.0 | ||||
|  | ||||
							
								
								
									
										143
									
								
								external/llvm/releases/17.0.0/patches_external/unify-max-alignment-with-generic-max.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								external/llvm/releases/17.0.0/patches_external/unify-max-alignment-with-generic-max.patch
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @ -0,0 +1,143 @@ | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Copyright (C) 2025 Intel Corporation | ||||
|  | ||||
| SPDX-License-Identifier: MIT | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| /*========================== begin_copyright_notice ============================ | ||||
|  | ||||
| Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| See https://llvm.org/LICENSE.txt for license information. | ||||
| SPDX-License-Identifier: Apache-2.0 with LLVM-exception | ||||
|  | ||||
| ============================= end_copyright_notice ===========================*/ | ||||
|  | ||||
| This is backport of this fix: https://github.com/llvm/llvm-project/pull/99257 | ||||
|  | ||||
| "[IR] Unify max alignment for arguments with generic max align." | ||||
|  | ||||
| From 47f034550a5fb9ef6adee6347cd3c00e70ca663d Mon Sep 17 00:00:00 2001 | ||||
| From: Eli Friedman <efriedma@quicinc.com> | ||||
| Date: Tue, 16 Jul 2024 16:03:38 -0700 | ||||
| Subject: [PATCH 1/3] Unify max alignment for arguments with generic max align. | ||||
|  | ||||
| The 2^14 limit was completely arbitrary; the generic limit is still | ||||
| arbitrary, but at least it's the same arbitrary limit as everything | ||||
| else. | ||||
|  | ||||
| While I'm here, also add a verifier check for the ByValOrByRefSize. | ||||
| --- | ||||
|  llvm/include/llvm/CodeGen/TargetCallingConv.h |  8 ++--- | ||||
|  llvm/lib/IR/Verifier.cpp                      | 34 +++++++++++-------- | ||||
|  2 files changed, 23 insertions(+), 19 deletions(-) | ||||
|  | ||||
| diff --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h | ||||
| index 89ea9bcb2a40..70a2d8e5faaf 100644 | ||||
| --- a/llvm/include/llvm/CodeGen/TargetCallingConv.h | ||||
| +++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h | ||||
| @@ -45,9 +45,9 @@ namespace ISD { | ||||
|      unsigned IsHva : 1;        ///< HVA field for | ||||
|      unsigned IsHvaStart : 1;   ///< HVA structure start | ||||
|      unsigned IsSecArgPass : 1; ///< Second argument | ||||
| -    unsigned MemAlign : 4;     ///< Log 2 of alignment when arg is passed in memory | ||||
| -                               ///< (including byval/byref). The max alignment is | ||||
| -                               ///< verified in IR verification. | ||||
| +    unsigned MemAlign : 6; ///< Log 2 of alignment when arg is passed in memory | ||||
| +                           ///< (including byval/byref). The max alignment is | ||||
| +                           ///< verified in IR verification. | ||||
|      unsigned OrigAlign : 5;    ///< Log 2 of original alignment | ||||
|      unsigned IsInConsecutiveRegsLast : 1; | ||||
|      unsigned IsInConsecutiveRegs : 1; | ||||
| @@ -67,7 +67,7 @@ namespace ISD { | ||||
|            IsSecArgPass(0), MemAlign(0), OrigAlign(0), | ||||
|            IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0), | ||||
|            IsCopyElisionCandidate(0), IsPointer(0) { | ||||
| -      static_assert(sizeof(*this) == 3 * sizeof(unsigned), "flags are too big"); | ||||
| +      static_assert(sizeof(*this) == 4 * sizeof(unsigned), "flags are too big"); | ||||
|      } | ||||
|   | ||||
|      bool isZExt() const { return IsZExt; } | ||||
| diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp | ||||
| index 1408ce293ca6..2370bfd0a2be 100644 | ||||
| --- a/llvm/lib/IR/Verifier.cpp | ||||
| +++ b/llvm/lib/IR/Verifier.cpp | ||||
| @@ -291,13 +291,6 @@ namespace { | ||||
|   | ||||
|  class Verifier : public InstVisitor<Verifier>, VerifierSupport { | ||||
|    friend class InstVisitor<Verifier>; | ||||
| - | ||||
| -  // ISD::ArgFlagsTy::MemAlign only have 4 bits for alignment, so | ||||
| -  // the alignment size should not exceed 2^15. Since encode(Align) | ||||
| -  // would plus the shift value by 1, the alignment size should | ||||
| -  // not exceed 2^14, otherwise it can NOT be properly lowered | ||||
| -  // in backend. | ||||
| -  static constexpr unsigned ParamMaxAlignment = 1 << 14; | ||||
|    DominatorTree DT; | ||||
|   | ||||
|    /// When verifying a basic block, keep track of all of the | ||||
| @@ -1939,31 +1932,43 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, | ||||
|    } | ||||
|   | ||||
|    if (isa<PointerType>(Ty)) { | ||||
| +    if (Attrs.hasAttribute(Attribute::Alignment)) { | ||||
| +      Align AttrAlign = Attrs.getAlignment().valueOrOne(); | ||||
| +      Check(AttrAlign.value() <= Value::MaximumAlignment, | ||||
| +            "huge alignment values are unsupported", V); | ||||
| +    } | ||||
|      if (Attrs.hasAttribute(Attribute::ByVal)) { | ||||
| -      if (Attrs.hasAttribute(Attribute::Alignment)) { | ||||
| -        Align AttrAlign = Attrs.getAlignment().valueOrOne(); | ||||
| -        Align MaxAlign(ParamMaxAlignment); | ||||
| -        Check(AttrAlign <= MaxAlign, | ||||
| -              "Attribute 'align' exceed the max size 2^14", V); | ||||
| -      } | ||||
|        SmallPtrSet<Type *, 4> Visited; | ||||
|        Check(Attrs.getByValType()->isSized(&Visited), | ||||
|              "Attribute 'byval' does not support unsized types!", V); | ||||
| +      Check(DL.getTypeAllocSize(Attrs.getByValType()).getKnownMinValue() < | ||||
| +                (1ULL << 32), | ||||
| +            "huge 'byval' arguments are unsupported", V); | ||||
|      } | ||||
|      if (Attrs.hasAttribute(Attribute::ByRef)) { | ||||
|        SmallPtrSet<Type *, 4> Visited; | ||||
|        Check(Attrs.getByRefType()->isSized(&Visited), | ||||
|              "Attribute 'byref' does not support unsized types!", V); | ||||
| +      Check(DL.getTypeAllocSize(Attrs.getByRefType()).getKnownMinValue() < | ||||
| +                (1ULL << 32), | ||||
| +            "huge 'byref' arguments are unsupported", V); | ||||
|      } | ||||
|      if (Attrs.hasAttribute(Attribute::InAlloca)) { | ||||
|        SmallPtrSet<Type *, 4> Visited; | ||||
|        Check(Attrs.getInAllocaType()->isSized(&Visited), | ||||
|              "Attribute 'inalloca' does not support unsized types!", V); | ||||
| +      Check(DL.getTypeAllocSize(Attrs.getInAllocaType()).getKnownMinValue() < | ||||
| +                (1ULL << 32), | ||||
| +            "huge 'inalloca' arguments are unsupported", V); | ||||
|      } | ||||
|      if (Attrs.hasAttribute(Attribute::Preallocated)) { | ||||
|        SmallPtrSet<Type *, 4> Visited; | ||||
|        Check(Attrs.getPreallocatedType()->isSized(&Visited), | ||||
|              "Attribute 'preallocated' does not support unsized types!", V); | ||||
| +      Check( | ||||
| +        DL.getTypeAllocSize(Attrs.getPreallocatedType()).getKnownMinValue() < | ||||
| +            (1ULL << 32), | ||||
| +        "huge 'preallocated' arguments are unsupported", V); | ||||
|      } | ||||
|    } | ||||
|   | ||||
| @@ -3424,8 +3429,7 @@ void Verifier::visitCallBase(CallBase &Call) { | ||||
|      if (!Ty->isSized()) | ||||
|        return; | ||||
|      Align ABIAlign = DL.getABITypeAlign(Ty); | ||||
| -    Align MaxAlign(ParamMaxAlignment); | ||||
| -    Check(ABIAlign <= MaxAlign, | ||||
| +    Check(ABIAlign.value() <= Value::MaximumAlignment, | ||||
|            "Incorrect alignment of " + Message + " to called function!", Call); | ||||
|    }; | ||||
|  | ||||
|  | ||||
| -- | ||||
| 2.43.0 | ||||
|  | ||||
		Reference in New Issue
	
	Block a user