mirror of
				https://github.com/intel/intel-graphics-compiler.git
				synced 2025-10-30 08:18:26 +08:00 
			
		
		
		
	GenXPromoteArray opaque pointers fix
Do not rely on bitcasts when deciding whether an index adjustment is necessary. In opaque pointers mode types can change between instructions without bitcasts.
This commit is contained in:
		@ -1,6 +1,6 @@
 | 
			
		||||
/*========================== begin_copyright_notice ============================
 | 
			
		||||
 | 
			
		||||
Copyright (C) 2019-2024 Intel Corporation
 | 
			
		||||
Copyright (C) 2019-2025 Intel Corporation
 | 
			
		||||
 | 
			
		||||
SPDX-License-Identifier: MIT
 | 
			
		||||
 | 
			
		||||
@ -148,15 +148,14 @@ namespace {
 | 
			
		||||
// a considered element in a considered vector.
 | 
			
		||||
struct GenericVectorIndex {
 | 
			
		||||
  Value *Index;
 | 
			
		||||
  int ElementSizeInBits;
 | 
			
		||||
  bool NeedAdjust = false;
 | 
			
		||||
  unsigned ElementSizeInBits;
 | 
			
		||||
 | 
			
		||||
  int getElementSizeInBytes() const {
 | 
			
		||||
  unsigned getElementSizeInBytes() const {
 | 
			
		||||
    return ElementSizeInBits / genx::ByteBits;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template <typename FolderT = ConstantFolder>
 | 
			
		||||
  void adjustIndex(Type *Ty, IRBuilder<FolderT> &IRB);
 | 
			
		||||
  void adjust(Type *Ty, IRBuilder<FolderT> &IRB);
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
class TransposeHelper {
 | 
			
		||||
@ -228,25 +227,35 @@ Type *getBaseType(Type *Ty, Type *BaseTy) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename FolderT>
 | 
			
		||||
void GenericVectorIndex::adjustIndex(Type *Ty, IRBuilder<FolderT> &IRB) {
 | 
			
		||||
  if (!NeedAdjust)
 | 
			
		||||
    return;
 | 
			
		||||
void GenericVectorIndex::adjust(Type *Ty, IRBuilder<FolderT> &IRB) {
 | 
			
		||||
  auto *BaseTy = getBaseType(Ty, nullptr);
 | 
			
		||||
  IGC_ASSERT_EXIT(BaseTy);
 | 
			
		||||
  if (BaseTy->getScalarSizeInBits() == ElementSizeInBits ||
 | 
			
		||||
  unsigned NewElementSizeInBits = BaseTy->getScalarSizeInBits();
 | 
			
		||||
  if (NewElementSizeInBits == ElementSizeInBits ||
 | 
			
		||||
      vc::isFunctionPointerType(BaseTy))
 | 
			
		||||
    return;
 | 
			
		||||
  IGC_ASSERT_EXIT(BaseTy->getScalarSizeInBits() == 8);
 | 
			
		||||
  Constant *Scale =
 | 
			
		||||
      IRB.getInt32(ElementSizeInBits / BaseTy->getScalarSizeInBits());
 | 
			
		||||
  if (Index->getType()->isVectorTy()) {
 | 
			
		||||
    auto Width =
 | 
			
		||||
        cast<IGCLLVM::FixedVectorType>(Index->getType())->getNumElements();
 | 
			
		||||
    Scale = ConstantVector::getSplat(IGCLLVM::getElementCount(Width), Scale);
 | 
			
		||||
  if (NewElementSizeInBits < ElementSizeInBits) {
 | 
			
		||||
    IGC_ASSERT_MESSAGE(ElementSizeInBits % NewElementSizeInBits == 0,
 | 
			
		||||
                       "New element size is not a divisor of the current one");
 | 
			
		||||
    Constant *Scale = IRB.getInt32(ElementSizeInBits / NewElementSizeInBits);
 | 
			
		||||
    if (Index->getType()->isVectorTy()) {
 | 
			
		||||
      auto Width =
 | 
			
		||||
          cast<IGCLLVM::FixedVectorType>(Index->getType())->getNumElements();
 | 
			
		||||
      Scale = ConstantVector::getSplat(IGCLLVM::getElementCount(Width), Scale);
 | 
			
		||||
    }
 | 
			
		||||
    Index = IRB.CreateMul(Index, Scale);
 | 
			
		||||
  } else {
 | 
			
		||||
    IGC_ASSERT_MESSAGE(NewElementSizeInBits % ElementSizeInBits == 0,
 | 
			
		||||
                       "New element size is not a multiple of the current one");
 | 
			
		||||
    Constant *Scale = IRB.getInt32(NewElementSizeInBits / ElementSizeInBits);
 | 
			
		||||
    if (Index->getType()->isVectorTy()) {
 | 
			
		||||
      auto Width =
 | 
			
		||||
          cast<IGCLLVM::FixedVectorType>(Index->getType())->getNumElements();
 | 
			
		||||
      Scale = ConstantVector::getSplat(IGCLLVM::getElementCount(Width), Scale);
 | 
			
		||||
    }
 | 
			
		||||
    Index = IRB.CreateUDiv(Index, Scale);
 | 
			
		||||
  }
 | 
			
		||||
  Index = IRB.CreateMul(Index, Scale);
 | 
			
		||||
  ElementSizeInBits = BaseTy->getScalarSizeInBits();
 | 
			
		||||
  NeedAdjust = false;
 | 
			
		||||
  ElementSizeInBits = NewElementSizeInBits;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename FolderT>
 | 
			
		||||
@ -291,7 +300,6 @@ void TransposeHelper::EraseDeadCode() {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void TransposeHelper::handleBCInst(BitCastInst &BC, GenericVectorIndex Idx) {
 | 
			
		||||
  Idx.NeedAdjust = true;
 | 
			
		||||
  ToBeRemoved.push_back(&BC);
 | 
			
		||||
  handleAllocaSources(BC, Idx);
 | 
			
		||||
}
 | 
			
		||||
@ -375,7 +383,7 @@ void TransposeHelper::handleGEPInst(GetElementPtrInst *GEP,
 | 
			
		||||
                                    GenericVectorIndex Idx) {
 | 
			
		||||
  ToBeRemoved.push_back(GEP);
 | 
			
		||||
  IRBuilder<> IRB(GEP);
 | 
			
		||||
  Idx.adjustIndex(GEP->getSourceElementType(), IRB);
 | 
			
		||||
  Idx.adjust(GEP->getSourceElementType(), IRB);
 | 
			
		||||
  Value *PtrOp = GEP->getPointerOperand();
 | 
			
		||||
  PointerType *PtrTy = dyn_cast<PointerType>(PtrOp->getType());
 | 
			
		||||
  IGC_ASSERT_MESSAGE(PtrTy, "Only accept scalar pointer!");
 | 
			
		||||
@ -499,7 +507,7 @@ void TransposeHelper::handlePHINode(PHINode *Phi, GenericVectorIndex Idx,
 | 
			
		||||
void TransposeHelper::handleLoadInst(LoadInst *Load, GenericVectorIndex Idx) {
 | 
			
		||||
  IGC_ASSERT(Load->isSimple());
 | 
			
		||||
  IRBuilder<> IRB(Load);
 | 
			
		||||
  Idx.adjustIndex(Load->getType(), IRB);
 | 
			
		||||
  Idx.adjust(Load->getType(), IRB);
 | 
			
		||||
  auto *ScalarizedIdx =
 | 
			
		||||
      IRB.CreateMul(Idx.Index, ConstantInt::get(Idx.Index->getType(),
 | 
			
		||||
                                                Idx.getElementSizeInBytes()));
 | 
			
		||||
@ -559,7 +567,7 @@ void TransposeHelper::handleStoreInst(StoreInst *Store,
 | 
			
		||||
  IGC_ASSERT(Store->isSimple());
 | 
			
		||||
  IRBuilder<> IRB(Store);
 | 
			
		||||
  Value *StoreVal = Store->getValueOperand();
 | 
			
		||||
  Idx.adjustIndex(StoreVal->getType(), IRB);
 | 
			
		||||
  Idx.adjust(StoreVal->getType(), IRB);
 | 
			
		||||
  auto *ScalarizedIdx =
 | 
			
		||||
      IRB.CreateMul(Idx.Index, ConstantInt::get(Idx.Index->getType(),
 | 
			
		||||
                                                Idx.getElementSizeInBytes()));
 | 
			
		||||
@ -626,7 +634,7 @@ void TransposeHelper::handleStoreInst(StoreInst *Store,
 | 
			
		||||
void TransposeHelper::handleGather(IntrinsicInst *Inst, GenericVectorIndex Idx,
 | 
			
		||||
                                   unsigned MaskIndex, unsigned ValueIndex) {
 | 
			
		||||
  IRBuilder<> IRB(Inst);
 | 
			
		||||
  Idx.adjustIndex(Inst->getType(), IRB);
 | 
			
		||||
  Idx.adjust(Type::getInt8Ty(Inst->getContext()), IRB);
 | 
			
		||||
  auto *ScalarizedIdx =
 | 
			
		||||
      IRB.CreateMul(Idx.Index, ConstantInt::get(Idx.Index->getType(),
 | 
			
		||||
                                                Idx.getElementSizeInBytes()));
 | 
			
		||||
@ -666,8 +674,8 @@ void TransposeHelper::handleGather(IntrinsicInst *Inst, GenericVectorIndex Idx,
 | 
			
		||||
void TransposeHelper::handleScatter(IntrinsicInst *Inst, GenericVectorIndex Idx,
 | 
			
		||||
                                    unsigned MaskIndex, unsigned ValueIndex) {
 | 
			
		||||
  IRBuilder<> IRB(Inst);
 | 
			
		||||
  Idx.adjust(Type::getInt8Ty(Inst->getContext()), IRB);
 | 
			
		||||
  auto *StoreVal = Inst->getArgOperand(ValueIndex);
 | 
			
		||||
  Idx.adjustIndex(StoreVal->getType(), IRB);
 | 
			
		||||
  auto *ScalarizedIdx =
 | 
			
		||||
      IRB.CreateMul(Idx.Index, ConstantInt::get(Idx.Index->getType(),
 | 
			
		||||
                                                Idx.getElementSizeInBytes()));
 | 
			
		||||
@ -1122,8 +1130,8 @@ void GenXPromoteArray::handleAllocaInst(AllocaInst *Alloca) {
 | 
			
		||||
    return;
 | 
			
		||||
 | 
			
		||||
  IRBuilder<> IRB(VecAlloca);
 | 
			
		||||
  GenericVectorIndex StartIdx{IRB.getInt32(0),
 | 
			
		||||
                              static_cast<int>(DL->getTypeSizeInBits(BaseTy))};
 | 
			
		||||
  GenericVectorIndex StartIdx{
 | 
			
		||||
      IRB.getInt32(0), static_cast<unsigned>(DL->getTypeSizeInBits(BaseTy))};
 | 
			
		||||
  TransposeHelper Helper(VecAlloca, DL);
 | 
			
		||||
  Helper.handleAllocaSources(*Alloca, StartIdx);
 | 
			
		||||
  Helper.EraseDeadCode();
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										92
									
								
								IGC/VectorCompiler/test/PromoteArray/opaque_ptrs.ll
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								IGC/VectorCompiler/test/PromoteArray/opaque_ptrs.ll
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,92 @@
 | 
			
		||||
;=========================== begin_copyright_notice ============================
 | 
			
		||||
;
 | 
			
		||||
; Copyright (C) 2025 Intel Corporation
 | 
			
		||||
;
 | 
			
		||||
; SPDX-License-Identifier: MIT
 | 
			
		||||
;
 | 
			
		||||
;============================ end_copyright_notice =============================
 | 
			
		||||
 | 
			
		||||
; RUN: %opt_opaque_ptrs %use_old_pass_manager% -GenXPromoteArray -march=genx64 -mcpu=XeLP -S < %s | FileCheck %s --check-prefixes=CHECK
 | 
			
		||||
 | 
			
		||||
define dllexport spir_kernel void @f_f(ptr addrspace(1) %out) {
 | 
			
		||||
; CHECK: [[ALLOCA:%.*]] = alloca <4 x i32>
 | 
			
		||||
  %alloca = alloca [4 x i32], align 64
 | 
			
		||||
; CHECK-NEXT: [[LOAD0:%.*]] = load <4 x i32>, ptr [[ALLOCA]]
 | 
			
		||||
; CHECK-NEXT: [[INS0:%.*]] = insertelement <4 x i32> [[LOAD0]], i32 0, i32 0
 | 
			
		||||
; CHECK-NEXT: [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 1, i32 1
 | 
			
		||||
; CHECK-NEXT: store <4 x i32> [[INS1]], ptr [[ALLOCA]]
 | 
			
		||||
  store <2 x i32> <i32 0, i32 1>, ptr %alloca
 | 
			
		||||
; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i32>, ptr [[ALLOCA]]
 | 
			
		||||
; CHECK-NEXT: [[INS2:%.*]] = insertelement <4 x i32> [[LOAD1]], i32 2, i32 2
 | 
			
		||||
; CHECK-NEXT: [[INS3:%.*]] = insertelement <4 x i32> [[INS2]], i32 3, i32 3
 | 
			
		||||
; CHECK-NEXT: store <4 x i32> [[INS3]], ptr [[ALLOCA]]
 | 
			
		||||
  %gep1 = getelementptr i8, ptr %alloca, i64 8
 | 
			
		||||
  store <2 x i32> <i32 2, i32 3>, ptr %gep1
 | 
			
		||||
; CHECK-NEXT: [[LOAD2:%.*]] = load <4 x i32>, ptr [[ALLOCA]]
 | 
			
		||||
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[LOAD2]] to <16 x i8>
 | 
			
		||||
; CHECK-NEXT: [[EX0:%.*]] = extractelement <16 x i8> [[BC]], i32 0
 | 
			
		||||
; CHECK-NEXT: [[INS4:%.*]] = insertelement <12 x i8> undef, i8 [[EX0]], i32 0
 | 
			
		||||
; CHECK-NEXT: [[EX1:%.*]] = extractelement <16 x i8> [[BC]], i32 1
 | 
			
		||||
; CHECK-NEXT: [[INS5:%.*]] = insertelement <12 x i8> [[INS4]], i8 [[EX1]], i32 1
 | 
			
		||||
; CHECK-NEXT: [[EX2:%.*]] = extractelement <16 x i8> [[BC]], i32 2
 | 
			
		||||
; CHECK-NEXT: [[INS6:%.*]] = insertelement <12 x i8> [[INS5]], i8 [[EX2]], i32 2
 | 
			
		||||
; CHECK-NEXT: [[EX3:%.*]] = extractelement <16 x i8> [[BC]], i32 3
 | 
			
		||||
; CHECK-NEXT: [[INS7:%.*]] = insertelement <12 x i8> [[INS6]], i8 [[EX3]], i32 3
 | 
			
		||||
; CHECK-NEXT: [[EX4:%.*]] = extractelement <16 x i8> [[BC]], i32 4
 | 
			
		||||
; CHECK-NEXT: [[INS8:%.*]] = insertelement <12 x i8> [[INS7]], i8 [[EX4]], i32 4
 | 
			
		||||
; CHECK-NEXT: [[EX5:%.*]] = extractelement <16 x i8> [[BC]], i32 5
 | 
			
		||||
; CHECK-NEXT: [[INS9:%.*]] = insertelement <12 x i8> [[INS8]], i8 [[EX5]], i32 5
 | 
			
		||||
; CHECK-NEXT: [[EX6:%.*]] = extractelement <16 x i8> [[BC]], i32 6
 | 
			
		||||
; CHECK-NEXT: [[INS10:%.*]] = insertelement <12 x i8> [[INS9]], i8 [[EX6]], i32 6
 | 
			
		||||
; CHECK-NEXT: [[EX7:%.*]] = extractelement <16 x i8> [[BC]], i32 7
 | 
			
		||||
; CHECK-NEXT: [[INS11:%.*]] = insertelement <12 x i8> [[INS10]], i8 [[EX7]], i32 7
 | 
			
		||||
; CHECK-NEXT: [[EX8:%.*]] = extractelement <16 x i8> [[BC]], i32 8
 | 
			
		||||
; CHECK-NEXT: [[INS12:%.*]] = insertelement <12 x i8> [[INS11]], i8 [[EX8]], i32 8
 | 
			
		||||
; CHECK-NEXT: [[EX9:%.*]] = extractelement <16 x i8> [[BC]], i32 9
 | 
			
		||||
; CHECK-NEXT: [[INS13:%.*]] = insertelement <12 x i8> [[INS12]], i8 [[EX9]], i32 9
 | 
			
		||||
; CHECK-NEXT: [[EX10:%.*]] = extractelement <16 x i8> [[BC]], i32 10
 | 
			
		||||
; CHECK-NEXT: [[INS14:%.*]] = insertelement <12 x i8> [[INS13]], i8 [[EX10]], i32 10
 | 
			
		||||
; CHECK-NEXT: [[EX11:%.*]] = extractelement <16 x i8> [[BC]], i32 11
 | 
			
		||||
; CHECK-NEXT: [[INS15:%.*]] = insertelement <12 x i8> [[INS14]], i8 [[EX11]], i32 11
 | 
			
		||||
  %gep2 = getelementptr i8, ptr %alloca, i64 4
 | 
			
		||||
  %load1 = load <12 x i8>, ptr %alloca
 | 
			
		||||
; CHECK-NEXT: [[LOAD3:%.*]] = load <4 x i32>, ptr [[ALLOCA]]
 | 
			
		||||
; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[LOAD3]] to <16 x i8>
 | 
			
		||||
; CHECK-NEXT: [[EX12:%.*]] = extractelement <12 x i8> [[INS15]], i32 0
 | 
			
		||||
; CHECK-NEXT: [[INS16:%.*]] = insertelement <16 x i8> [[BC1]], i8 [[EX12]], i32 4
 | 
			
		||||
; CHECK-NEXT: [[EX13:%.*]] = extractelement <12 x i8> [[INS15]], i32 1
 | 
			
		||||
; CHECK-NEXT: [[INS17:%.*]] = insertelement <16 x i8> [[INS16]], i8 [[EX13]], i32 5
 | 
			
		||||
; CHECK-NEXT: [[EX14:%.*]] = extractelement <12 x i8> [[INS15]], i32 2
 | 
			
		||||
; CHECK-NEXT: [[INS18:%.*]] = insertelement <16 x i8> [[INS17]], i8 [[EX14]], i32 6
 | 
			
		||||
; CHECK-NEXT: [[EX15:%.*]] = extractelement <12 x i8> [[INS15]], i32 3
 | 
			
		||||
; CHECK-NEXT: [[INS19:%.*]] = insertelement <16 x i8> [[INS18]], i8 [[EX15]], i32 7
 | 
			
		||||
; CHECK-NEXT: [[EX16:%.*]] = extractelement <12 x i8> [[INS15]], i32 4
 | 
			
		||||
; CHECK-NEXT: [[INS20:%.*]] = insertelement <16 x i8> [[INS19]], i8 [[EX16]], i32 8
 | 
			
		||||
; CHECK-NEXT: [[EX17:%.*]] = extractelement <12 x i8> [[INS15]], i32 5
 | 
			
		||||
; CHECK-NEXT: [[INS21:%.*]] = insertelement <16 x i8> [[INS20]], i8 [[EX17]], i32 9
 | 
			
		||||
; CHECK-NEXT: [[EX18:%.*]] = extractelement <12 x i8> [[INS15]], i32 6
 | 
			
		||||
; CHECK-NEXT: [[INS22:%.*]] = insertelement <16 x i8> [[INS21]], i8 [[EX18]], i32 10
 | 
			
		||||
; CHECK-NEXT: [[EX19:%.*]] = extractelement <12 x i8> [[INS15]], i32 7
 | 
			
		||||
; CHECK-NEXT: [[INS23:%.*]] = insertelement <16 x i8> [[INS22]], i8 [[EX19]], i32 11
 | 
			
		||||
; CHECK-NEXT: [[EX20:%.*]] = extractelement <12 x i8> [[INS15]], i32 8
 | 
			
		||||
; CHECK-NEXT: [[INS24:%.*]] = insertelement <16 x i8> [[INS23]], i8 [[EX20]], i32 12
 | 
			
		||||
; CHECK-NEXT: [[EX21:%.*]] = extractelement <12 x i8> [[INS15]], i32 9
 | 
			
		||||
; CHECK-NEXT: [[INS25:%.*]] = insertelement <16 x i8> [[INS24]], i8 [[EX21]], i32 13
 | 
			
		||||
; CHECK-NEXT: [[EX22:%.*]] = extractelement <12 x i8> [[INS15]], i32 10
 | 
			
		||||
; CHECK-NEXT: [[INS26:%.*]] = insertelement <16 x i8> [[INS25]], i8 [[EX22]], i32 14
 | 
			
		||||
; CHECK-NEXT: [[EX23:%.*]] = extractelement <12 x i8> [[INS15]], i32 11
 | 
			
		||||
; CHECK-NEXT: [[INS27:%.*]] = insertelement <16 x i8> [[INS26]], i8 [[EX23]], i32 15
 | 
			
		||||
; CHECK-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[INS27]] to <4 x i32>
 | 
			
		||||
; CHECK-NEXT: store <4 x i32> [[BC2]], ptr [[ALLOCA]]
 | 
			
		||||
  store <12 x i8> %load1, ptr %gep2
 | 
			
		||||
; CHECK-NEXT: [[LOAD4:%.*]] = load <4 x i32>, ptr [[ALLOCA]]
 | 
			
		||||
; CHECK-NEXT: [[EX24:%.*]] = extractelement <4 x i32> [[LOAD4]], i32 0
 | 
			
		||||
; CHECK-NEXT: [[INS28:%.*]] = insertelement <2 x i32> undef, i32 [[EX24]], i32 0
 | 
			
		||||
; CHECK-NEXT: [[EX25:%.*]] = extractelement <4 x i32> [[LOAD4]], i32 1
 | 
			
		||||
; CHECK-NEXT: [[INS29:%.*]] = insertelement <2 x i32> [[INS28]], i32 [[EX25]], i32 1
 | 
			
		||||
  %load2 = load <2 x i32>, ptr %alloca
 | 
			
		||||
; CHECK-NEXT: store <2 x i32> [[INS29]], ptr addrspace(1) %out
 | 
			
		||||
  store <2 x i32> %load2, ptr addrspace(1) %out
 | 
			
		||||
; CHECK-NEXT: ret void
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
		Reference in New Issue
	
	Block a user