mirror of
https://github.com/intel/intel-graphics-compiler.git
synced 2025-11-04 08:21:06 +08:00
Add analysis of stateless memory load/store Add the analysis to StatelessToStatefull pass to check if there is non kernel argument memory load/store. Change-Id: I0217a6a0d2e755cfe4b509664290ac67cc7974ab
2188 lines
78 KiB
C++
2188 lines
78 KiB
C++
/*===================== begin_copyright_notice ==================================
|
|
|
|
Copyright (c) 2017 Intel Corporation
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a
|
|
copy of this software and associated documentation files (the
|
|
"Software"), to deal in the Software without restriction, including
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
permit persons to whom the Software is furnished to do so, subject to
|
|
the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included
|
|
in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
|
======================= end_copyright_notice ==================================*/
|
|
|
|
#include "Compiler/CISACodeGen/helper.h"
|
|
#include "Compiler/CISACodeGen/CISACodeGen.h"
|
|
#include "Compiler/Optimizer/OpenCLPasses/KernelArgs.hpp"
|
|
#include "Compiler/MetaDataUtilsWrapper.h"
|
|
#include "common/LLVMWarningsPush.hpp"
|
|
#include "llvm/Config/llvm-config.h"
|
|
#include "llvmWrapper/IR/DerivedTypes.h"
|
|
#include "llvmWrapper/Support/KnownBits.h"
|
|
#include "llvmWrapper/IR/Instructions.h"
|
|
#include "llvmWrapper/Support/Alignment.h"
|
|
#include "llvm/IR/GetElementPtrTypeIterator.h"
|
|
#include "llvm/Analysis/ValueTracking.h"
|
|
#include "common/LLVMWarningsPop.hpp"
|
|
#include "GenISAIntrinsics/GenIntrinsicInst.h"
|
|
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
|
|
#include "common/secure_mem.h"
|
|
#include <stack>
|
|
#include "Probe/Assertion.h"
|
|
|
|
using namespace llvm;
|
|
using namespace GenISAIntrinsic;
|
|
|
|
/************************************************************************
|
|
This file contains helper functions for the code generator
|
|
Many functions use X-MACRO, that allow us to separate data about encoding
|
|
to the logic of the helper functions
|
|
|
|
************************************************************************/
|
|
|
|
namespace IGC
|
|
{
|
|
typedef union _gfxResourceAddrSpace
|
|
{
|
|
struct _bits
|
|
{
|
|
unsigned int bufId : 16;
|
|
unsigned int bufType : 4;
|
|
unsigned int indirect : 1; // bool
|
|
unsigned int reserved : 11;
|
|
} bits;
|
|
uint32_t u32Val;
|
|
} GFXResourceAddrSpace;
|
|
|
|
unsigned EncodeAS4GFXResource(
|
|
const llvm::Value& bufIdx,
|
|
BufferType bufType,
|
|
unsigned uniqueIndAS)
|
|
{
|
|
GFXResourceAddrSpace temp;
|
|
temp.u32Val = 0;
|
|
IGC_ASSERT((bufType + 1) < 16);
|
|
temp.bits.bufType = bufType + 1;
|
|
if (bufType == SLM)
|
|
{
|
|
return ADDRESS_SPACE_LOCAL; // We use addrspace 3 for SLM
|
|
}
|
|
else if (bufType == STATELESS_READONLY)
|
|
{
|
|
return ADDRESS_SPACE_CONSTANT;
|
|
}
|
|
else if (bufType == STATELESS)
|
|
{
|
|
return ADDRESS_SPACE_GLOBAL;
|
|
}
|
|
else if (llvm::isa<llvm::ConstantInt>(&bufIdx))
|
|
{
|
|
unsigned int bufId = (unsigned int)(llvm::cast<llvm::ConstantInt>(&bufIdx)->getZExtValue());
|
|
IGC_ASSERT(bufId < (1 << 31));
|
|
temp.bits.bufId = bufId;
|
|
return temp.u32Val;
|
|
}
|
|
|
|
// if it is indirect-buf, it is front-end's job to give a proper(unique) address-space per access
|
|
temp.bits.bufId = uniqueIndAS;
|
|
temp.bits.indirect = 1;
|
|
return temp.u32Val;
|
|
}
|
|
|
|
// Return true if AS is for a stateful surface.
|
|
// Stateful surface should have an encoded AS that is bigger than
|
|
// ADDRESS_SPACE_NUM_ADDRESSES.
|
|
bool isStatefulAddrSpace(unsigned AS)
|
|
{
|
|
return AS > ADDRESS_SPACE_NUM_ADDRESSES;
|
|
}
|
|
|
|
bool isDummyBasicBlock(llvm::BasicBlock* BB)
|
|
{
|
|
if (BB->size() != 1)
|
|
return false;
|
|
if ((++pred_begin(BB)) != pred_end(BB))
|
|
return false;
|
|
if ((++succ_begin(BB)) != succ_end(BB))
|
|
return false;
|
|
return true;
|
|
}
|
|
|
|
unsigned SetBufferAsBindless(unsigned addressSpaceOfPtr, BufferType bufferType)
|
|
{
|
|
GFXResourceAddrSpace temp = {};
|
|
temp.u32Val = addressSpaceOfPtr;
|
|
|
|
// Mark buffer as it is bindless for further processing
|
|
if (bufferType == BufferType::RESOURCE)
|
|
{
|
|
temp.bits.bufType = IGC::BINDLESS_TEXTURE + 1;
|
|
}
|
|
if (bufferType == BufferType::CONSTANT_BUFFER)
|
|
{
|
|
temp.bits.bufType = IGC::BINDLESS_CONSTANT_BUFFER + 1;
|
|
}
|
|
if(bufferType == BufferType::UAV)
|
|
{
|
|
temp.bits.bufType = IGC::BINDLESS + 1;
|
|
}
|
|
else if (bufferType == BufferType::SAMPLER)
|
|
{
|
|
temp.bits.bufType = IGC::BINDLESS_SAMPLER + 1;
|
|
}
|
|
else
|
|
{
|
|
IGC_ASSERT_MESSAGE(0, "other types of buffers shouldn't reach this part");
|
|
}
|
|
|
|
return temp.u32Val;
|
|
}
|
|
|
|
bool UsesTypedConstantBuffer(CodeGenContext* pContext)
|
|
{
|
|
if (pContext->m_DriverInfo.UsesTypedConstantBuffers3D() &&
|
|
pContext->type != ShaderType::COMPUTE_SHADER)
|
|
{
|
|
return true;
|
|
}
|
|
if (pContext->m_DriverInfo.UsesTypedConstantBuffersGPGPU() &&
|
|
pContext->type == ShaderType::COMPUTE_SHADER)
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
///
|
|
/// if you want resource-dimension, use GetBufferDimension()
|
|
///
|
|
BufferType DecodeAS4GFXResource(unsigned addrSpace, bool& directIndexing, unsigned& bufId)
|
|
{
|
|
GFXResourceAddrSpace temp;
|
|
temp.u32Val = addrSpace;
|
|
|
|
directIndexing = (temp.bits.indirect == 0);
|
|
bufId = temp.bits.bufId;
|
|
|
|
if (addrSpace == ADDRESS_SPACE_LOCAL)
|
|
{
|
|
return SLM;
|
|
}
|
|
unsigned bufType = temp.bits.bufType - 1;
|
|
if (bufType < BUFFER_TYPE_UNKNOWN)
|
|
{
|
|
return (BufferType)bufType;
|
|
}
|
|
return BUFFER_TYPE_UNKNOWN;
|
|
}
|
|
///
|
|
/// returns constant buffer load offset
|
|
///
|
|
int getConstantBufferLoadOffset(llvm::LoadInst* ld)
|
|
{
|
|
int offset = 0;
|
|
Value* ptr = ld->getPointerOperand();
|
|
if (isa<ConstantPointerNull>(ptr))
|
|
{
|
|
offset = 0;
|
|
}
|
|
else if (IntToPtrInst * itop = dyn_cast<IntToPtrInst>(ptr))
|
|
{
|
|
ConstantInt* ci = dyn_cast<ConstantInt>(
|
|
itop->getOperand(0));
|
|
if (ci)
|
|
{
|
|
offset = int_cast<unsigned>(ci->getZExtValue());
|
|
}
|
|
}
|
|
else if (ConstantExpr * itop = dyn_cast<ConstantExpr>(ptr))
|
|
{
|
|
if (itop->getOpcode() == Instruction::IntToPtr)
|
|
{
|
|
offset = int_cast<unsigned>(
|
|
cast<ConstantInt>(itop->getOperand(0))->getZExtValue());
|
|
}
|
|
}
|
|
return offset;
|
|
}
|
|
///
|
|
/// returns info if direct addressing is used
|
|
///
|
|
bool IsDirectIdx(unsigned addrSpace)
|
|
{
|
|
GFXResourceAddrSpace temp;
|
|
temp.u32Val = addrSpace;
|
|
return (temp.bits.indirect == 0);
|
|
}
|
|
|
|
bool isNaNCheck(llvm::FCmpInst& FC)
|
|
{
|
|
Value* Op1 = FC.getOperand(1);
|
|
if (FC.getPredicate() == CmpInst::FCMP_UNO)
|
|
{
|
|
auto CFP = dyn_cast<ConstantFP>(Op1);
|
|
return CFP && CFP->isZero();
|
|
}
|
|
else if (FC.getPredicate() == CmpInst::FCMP_UNE)
|
|
{
|
|
Value* Op0 = FC.getOperand(0);
|
|
return Op0 == Op1;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
llvm::LoadInst* cloneLoad(llvm::LoadInst* Orig, llvm::Value* Ptr)
|
|
{
|
|
llvm::LoadInst* LI = new llvm::LoadInst(
|
|
cast<PointerType>(Ptr->getType())->getElementType(),
|
|
Ptr, "", false, Orig);
|
|
LI->setVolatile(Orig->isVolatile());
|
|
LI->setAlignment(IGCLLVM::getAlign(Orig->getAlignment()));
|
|
if (LI->isAtomic())
|
|
{
|
|
LI->setAtomic(Orig->getOrdering(), Orig->getSyncScopeID());
|
|
}
|
|
// Clone metadata
|
|
llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 4> MDs;
|
|
Orig->getAllMetadata(MDs);
|
|
for (llvm::SmallVectorImpl<std::pair<unsigned, llvm::MDNode*> >::iterator
|
|
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
|
|
{
|
|
LI->setMetadata(MI->first, MI->second);
|
|
}
|
|
return LI;
|
|
}
|
|
|
|
llvm::StoreInst* cloneStore(llvm::StoreInst* Orig, llvm::Value* Val, llvm::Value* Ptr)
|
|
{
|
|
llvm::StoreInst* SI = new llvm::StoreInst(Val, Ptr, Orig);
|
|
SI->setVolatile(Orig->isVolatile());
|
|
SI->setAlignment(IGCLLVM::getAlign(Orig->getAlignment()));
|
|
if (SI->isAtomic())
|
|
{
|
|
SI->setAtomic(Orig->getOrdering(), Orig->getSyncScopeID());
|
|
}
|
|
// Clone metadata
|
|
llvm::SmallVector<std::pair<unsigned, llvm::MDNode*>, 4> MDs;
|
|
Orig->getAllMetadata(MDs);
|
|
for (llvm::SmallVectorImpl<std::pair<unsigned, llvm::MDNode*> >::iterator
|
|
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
|
|
{
|
|
SI->setMetadata(MI->first, MI->second);
|
|
}
|
|
return SI;
|
|
}
|
|
|
|
// Create a ldraw from a load instruction
|
|
Value* CreateLoadRawIntrinsic(LoadInst* inst, Instruction* bufPtr, Value* offsetVal)
|
|
{
|
|
Module* module = inst->getParent()->getParent()->getParent();
|
|
Function* func = nullptr;
|
|
IRBuilder<> builder(inst);
|
|
|
|
llvm::Type* tys[2];
|
|
tys[0] = inst->getType();
|
|
tys[1] = bufPtr->getType();
|
|
func = GenISAIntrinsic::getDeclaration(module, inst->getType()->isVectorTy() ? llvm::GenISAIntrinsic::GenISA_ldrawvector_indexed : llvm::GenISAIntrinsic::GenISA_ldraw_indexed, tys);
|
|
|
|
unsigned alignment = (inst->getType()->getScalarSizeInBits() / 8);
|
|
if (inst->getAlignment() > 0)
|
|
{
|
|
alignment = inst->getAlignment();
|
|
}
|
|
|
|
Value* attr[] =
|
|
{
|
|
bufPtr,
|
|
offsetVal,
|
|
builder.getInt32(alignment),
|
|
builder.getInt1(inst->isVolatile()) // volatile
|
|
};
|
|
Value* ld = builder.CreateCall(func, attr);
|
|
IGC_ASSERT(nullptr != ld);
|
|
IGC_ASSERT(ld->getType() == inst->getType());
|
|
return ld;
|
|
}
|
|
|
|
// Creates a storeraw from a store instruction
|
|
Value* CreateStoreRawIntrinsic(StoreInst* inst, Instruction* bufPtr, Value* offsetVal)
|
|
{
|
|
Module* module = inst->getParent()->getParent()->getParent();
|
|
Function* func = nullptr;
|
|
IRBuilder<> builder(inst);
|
|
Value* storeVal = inst->getValueOperand();
|
|
if (storeVal->getType()->isVectorTy())
|
|
{
|
|
llvm::Type* tys[2];
|
|
tys[0] = bufPtr->getType();
|
|
tys[1] = inst->getValueOperand()->getType();
|
|
func = GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_storerawvector_indexed, tys);
|
|
}
|
|
else
|
|
{
|
|
llvm::Type* dataType = storeVal->getType();
|
|
IGC_ASSERT(nullptr != dataType);
|
|
IGC_ASSERT((dataType->getPrimitiveSizeInBits() == 8) ||
|
|
(dataType->getPrimitiveSizeInBits() == 16) ||
|
|
(dataType->getPrimitiveSizeInBits() == 32));
|
|
|
|
llvm::Type* types[2] = {
|
|
bufPtr->getType(),
|
|
storeVal->getType() };
|
|
|
|
func = GenISAIntrinsic::getDeclaration(module, llvm::GenISAIntrinsic::GenISA_storeraw_indexed, types);
|
|
}
|
|
Value* attr[] =
|
|
{
|
|
bufPtr,
|
|
offsetVal,
|
|
storeVal,
|
|
builder.getInt32(storeVal->getType()->getScalarSizeInBits() / 8),
|
|
builder.getInt1(inst->isVolatile()) // volatile
|
|
};
|
|
Value* st = builder.CreateCall(func, attr);
|
|
return st;
|
|
}
|
|
|
|
///
|
|
/// Tries to trace a resource pointer (texture/sampler/buffer) back to
|
|
/// the pointer source. Also returns a vector of all instructions in the search path
|
|
///
|
|
Value* TracePointerSource(Value* resourcePtr, bool hasBranching, bool enablePhiLoops, bool fillList,
|
|
std::vector<Value*>& instList, llvm::SmallSet<PHINode*, 8> & visitedPHIs)
|
|
{
|
|
Value* srcPtr = nullptr;
|
|
Value* baseValue = resourcePtr;
|
|
|
|
while (true)
|
|
{
|
|
if (fillList)
|
|
{
|
|
instList.push_back(baseValue);
|
|
}
|
|
|
|
if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(baseValue))
|
|
{
|
|
// For bindless pointers
|
|
if ((inst->getIntrinsicID() == GenISAIntrinsic::GenISA_RuntimeValue) ||
|
|
(inst->getIntrinsicID() == GenISAIntrinsic::GenISA_GetBufferPtr))
|
|
{
|
|
srcPtr = baseValue;
|
|
}
|
|
break;
|
|
}
|
|
else if (isa<Argument>(baseValue))
|
|
{
|
|
// For compute, resource comes from the kernel args
|
|
srcPtr = baseValue;
|
|
break;
|
|
}
|
|
else if (isa<GlobalVariable>(baseValue))
|
|
{
|
|
// Can be an inline sampler/constant buffer
|
|
srcPtr = baseValue;
|
|
break;
|
|
}
|
|
else if (auto allocaInst = dyn_cast<AllocaInst>(baseValue))
|
|
{
|
|
if (allocaInst->getMetadata("igc.read_only_array"))
|
|
{
|
|
// Found alloca marked as read_only array.
|
|
srcPtr = baseValue;
|
|
}
|
|
break;
|
|
}
|
|
else if (CastInst * inst = dyn_cast<CastInst>(baseValue))
|
|
{
|
|
baseValue = inst->getOperand(0);
|
|
}
|
|
else if (GetElementPtrInst * inst = dyn_cast<GetElementPtrInst>(baseValue))
|
|
{
|
|
baseValue = inst->getOperand(0);
|
|
}
|
|
else if (BinaryOperator* inst = dyn_cast<BinaryOperator>(baseValue))
|
|
{
|
|
// Assume this is pointer arithmetic, which allows add/sub only.
|
|
// Follow the first operand assuming it's pointer base.
|
|
// Do not check the operand is pointer type now, leave the check
|
|
// until the leaf instruction is found.
|
|
Instruction::BinaryOps Opcode = inst->getOpcode();
|
|
if (Opcode == Instruction::Add || Opcode == Instruction::Sub)
|
|
{
|
|
baseValue = inst->getOperand(0);
|
|
}
|
|
else
|
|
break;
|
|
}
|
|
else if (PHINode * inst = dyn_cast<PHINode>(baseValue))
|
|
{
|
|
if (visitedPHIs.count(inst) != 0)
|
|
{
|
|
// stop if we've seen this phi node before
|
|
return baseValue;
|
|
}
|
|
visitedPHIs.insert(inst);
|
|
for (unsigned int i = 0; i < inst->getNumIncomingValues(); ++i)
|
|
{
|
|
// All phi paths must be trace-able and trace back to the same source
|
|
Value* phiVal = inst->getIncomingValue(i);
|
|
std::vector<Value*> splitList;
|
|
Value* phiSrcPtr = TracePointerSource(phiVal, true, enablePhiLoops, fillList, splitList, visitedPHIs);
|
|
if (phiSrcPtr == nullptr)
|
|
{
|
|
// Incoming value not trace-able, bail out.
|
|
return nullptr;
|
|
}
|
|
else if (isa<PHINode>(phiSrcPtr) && phiSrcPtr == baseValue)
|
|
{
|
|
// Found a loop in one of the phi paths. We can still trace as long as all the other paths match
|
|
if (enablePhiLoops)
|
|
continue;
|
|
else
|
|
return nullptr;
|
|
}
|
|
else if (srcPtr == nullptr)
|
|
{
|
|
// Found a path to the source pointer. We only save the instructions used in this path
|
|
srcPtr = phiSrcPtr;
|
|
instList.insert(instList.end(), splitList.begin(), splitList.end());
|
|
}
|
|
else if (srcPtr != phiSrcPtr)
|
|
{
|
|
// The source pointers have diverged. Bail out.
|
|
return nullptr;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
else if (SelectInst * inst = dyn_cast<SelectInst>(baseValue))
|
|
{
|
|
if (hasBranching)
|
|
{
|
|
// only allow a single branching instruction to be supported for now
|
|
// if both select and PHI are present, or there are multiples of each, we bail
|
|
break;
|
|
}
|
|
// Trace both operands of the select instruction. Both have to be traced back to the same
|
|
// source pointer, otherwise we can't determine which one to use.
|
|
Value* selectSrc0 = TracePointerSource(inst->getOperand(1), true, enablePhiLoops, fillList, instList, visitedPHIs);
|
|
Value* selectSrc1 = TracePointerSource(inst->getOperand(2), true, enablePhiLoops, false, instList, visitedPHIs);
|
|
if (selectSrc0 && selectSrc1 && selectSrc0 == selectSrc1)
|
|
{
|
|
srcPtr = selectSrc0;
|
|
break;
|
|
}
|
|
return nullptr;
|
|
}
|
|
else if (LoadInst* inst = dyn_cast<LoadInst>(baseValue))
|
|
{
|
|
if (inst->getPointerAddressSpace() == 0)
|
|
{
|
|
// May be local array of resources:
|
|
baseValue = inst->getPointerOperand();
|
|
}
|
|
else
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Unsupported instruction in search chain. Don't continue.
|
|
break;
|
|
}
|
|
}
|
|
return srcPtr;
|
|
}
|
|
|
|
///
|
|
/// Only trace the GetBufferPtr instruction (ignore GetElementPtr)
|
|
///
|
|
Value* TracePointerSource(Value* resourcePtr)
|
|
{
|
|
std::vector<Value*> tempList; //unused
|
|
llvm::SmallSet<PHINode*, 8> visitedPHIs;
|
|
return TracePointerSource(resourcePtr, false, true, false, tempList, visitedPHIs);
|
|
}
|
|
|
|
Value* TracePointerSource(Value* resourcePtr, bool hasBranching, bool enablePhiLoops, bool fillList, std::vector<Value*>& instList)
|
|
{
|
|
llvm::SmallSet<PHINode*, 8> visitedPHIs;
|
|
return TracePointerSource(resourcePtr, hasBranching, enablePhiLoops, fillList, instList, visitedPHIs);
|
|
}
|
|
|
|
static BufferAccessType getDefaultAccessType(BufferType bufTy)
|
|
{
|
|
switch (bufTy)
|
|
{
|
|
case BufferType::CONSTANT_BUFFER:
|
|
case BufferType::RESOURCE:
|
|
case BufferType::BINDLESS_TEXTURE:
|
|
case BufferType::BINDLESS_CONSTANT_BUFFER:
|
|
case BufferType::STATELESS_READONLY:
|
|
case BufferType::SAMPLER:
|
|
return BufferAccessType::ACCESS_READ;
|
|
|
|
case BufferType::UAV:
|
|
case BufferType::SLM:
|
|
case BufferType::POINTER:
|
|
case BufferType::BINDLESS:
|
|
case BufferType::STATELESS:
|
|
return BufferAccessType::ACCESS_READWRITE;
|
|
|
|
case BufferType::RENDER_TARGET:
|
|
return BufferAccessType::ACCESS_WRITE;
|
|
|
|
default:
|
|
IGC_ASSERT_MESSAGE(0, "Invalid buffer type");
|
|
return BufferAccessType::ACCESS_READWRITE;
|
|
}
|
|
}
|
|
|
|
bool GetResourcePointerInfo(Value* srcPtr, unsigned& resID, IGC::BufferType& resTy, BufferAccessType& accessTy, bool& needBufferOffset)
|
|
{
|
|
accessTy = BufferAccessType::ACCESS_READWRITE;
|
|
needBufferOffset = false;
|
|
if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(srcPtr))
|
|
{
|
|
// For bindless pointers with encoded metadata
|
|
if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_RuntimeValue)
|
|
{
|
|
if (inst->hasOperandBundles())
|
|
{
|
|
auto resIDBundle = inst->getOperandBundle("resID");
|
|
auto resTyBundle = inst->getOperandBundle("resTy");
|
|
auto accessTyBundle = inst->getOperandBundle("accessTy");
|
|
auto needBufferOffsetBundle = inst->getOperandBundle("needBufferOffset");
|
|
if (resIDBundle && resTyBundle)
|
|
{
|
|
resID = (unsigned)(cast<ConstantInt>(resIDBundle->Inputs.front()))->getZExtValue();
|
|
resTy = (BufferType)(cast<ConstantInt>(resTyBundle->Inputs.front()))->getZExtValue();
|
|
|
|
if (accessTyBundle)
|
|
accessTy = (BufferAccessType)(cast<ConstantInt>(accessTyBundle->Inputs.front()))->getZExtValue();
|
|
else
|
|
accessTy = getDefaultAccessType(resTy);
|
|
|
|
if(needBufferOffsetBundle)
|
|
needBufferOffset = (bool)(cast<ConstantInt>(needBufferOffsetBundle->Inputs.front()))->getZExtValue();
|
|
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
// For GetBufferPtr instructions with buffer info in the operands
|
|
else if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_GetBufferPtr)
|
|
{
|
|
Value* bufIdV = inst->getOperand(0);
|
|
Value* bufTyV = inst->getOperand(1);
|
|
if (isa<ConstantInt>(bufIdV) && isa<ConstantInt>(bufTyV))
|
|
{
|
|
resID = (unsigned)(cast<ConstantInt>(bufIdV)->getZExtValue());
|
|
resTy = (IGC::BufferType)(cast<ConstantInt>(bufTyV)->getZExtValue());
|
|
accessTy = getDefaultAccessType(resTy);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Get GRF offset from GenISA_RuntimeValue intrinsic call
|
|
bool GetGRFOffsetFromRTV(Value* pointerSrc, unsigned& GRFOffset)
|
|
{
|
|
if (GenIntrinsicInst * inst = dyn_cast<GenIntrinsicInst>(pointerSrc))
|
|
{
|
|
// For bindless pointers with encoded metadata
|
|
if (inst->getIntrinsicID() == GenISAIntrinsic::GenISA_RuntimeValue)
|
|
{
|
|
GRFOffset = (unsigned)llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue();
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool GetStatelessBufferInfo(Value* pointer, unsigned& bufIdOrGRFOffset,
|
|
BufferType & bufferTy, Value*& bufferSrcPtr, bool& isDirectBuf)
|
|
{
|
|
isDirectBuf = false;
|
|
// If the buffer info is not encoded in the address space, we can still find it by
|
|
// tracing the pointer to where it's created.
|
|
Value * src = IGC::TracePointerSource(pointer);
|
|
BufferAccessType accType;
|
|
bool needBufferOffset; // Unused
|
|
if (!src) return false;
|
|
if (IGC::GetResourcePointerInfo(src, bufIdOrGRFOffset, bufferTy, accType, needBufferOffset))
|
|
{
|
|
bufferSrcPtr = src;
|
|
isDirectBuf = true;
|
|
return true;
|
|
}
|
|
else if (GetGRFOffsetFromRTV(src, bufIdOrGRFOffset))
|
|
{
|
|
bufferSrcPtr = src;
|
|
bufferTy = BUFFER_TYPE_UNKNOWN;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool EvalConstantAddress(Value* address, int& offset, const llvm::DataLayout* pDL, Value* ptrSrc)
|
|
{
|
|
|
|
if ((ptrSrc == nullptr && isa<ConstantPointerNull>(address)) ||
|
|
(ptrSrc == address))
|
|
{
|
|
offset = 0;
|
|
return true;
|
|
}
|
|
else if (ConstantExpr * ptrExpr = dyn_cast<ConstantExpr>(address))
|
|
{
|
|
if (ptrExpr->getOpcode() == Instruction::IntToPtr)
|
|
{
|
|
Value* eltIdxVal = ptrExpr->getOperand(0);
|
|
ConstantInt* eltIdx = dyn_cast<ConstantInt>(eltIdxVal);
|
|
if (!eltIdx)
|
|
return false;
|
|
offset = int_cast<int>(eltIdx->getZExtValue());
|
|
return true;
|
|
}
|
|
}
|
|
else if (Instruction* ptrExpr = dyn_cast<Instruction>(address))
|
|
{
|
|
if (ptrExpr->getOpcode() == Instruction::BitCast ||
|
|
ptrExpr->getOpcode() == Instruction::AddrSpaceCast)
|
|
{
|
|
return EvalConstantAddress(ptrExpr->getOperand(0), offset, pDL, ptrSrc);
|
|
}
|
|
if (ptrExpr->getOpcode() == Instruction::IntToPtr)
|
|
{
|
|
Value * eltIdxVal = ptrExpr->getOperand(0);
|
|
ConstantInt * eltIdx = dyn_cast<ConstantInt>(eltIdxVal);
|
|
if (!eltIdx)
|
|
return false;
|
|
offset = int_cast<int>(eltIdx->getZExtValue());
|
|
return true;
|
|
}
|
|
else if (ptrExpr->getOpcode() == Instruction::GetElementPtr)
|
|
{
|
|
offset = 0;
|
|
if (!EvalConstantAddress(ptrExpr->getOperand(0), offset, pDL, ptrSrc))
|
|
{
|
|
return false;
|
|
}
|
|
Type * Ty = ptrExpr->getType();
|
|
gep_type_iterator GTI = gep_type_begin(ptrExpr);
|
|
for (auto OI = ptrExpr->op_begin() + 1, E = ptrExpr->op_end(); OI != E; ++OI, ++GTI) {
|
|
Value * Idx = *OI;
|
|
if (StructType * StTy = GTI.getStructTypeOrNull()) {
|
|
unsigned Field = int_cast<unsigned>(cast<ConstantInt>(Idx)->getZExtValue());
|
|
if (Field) {
|
|
offset += int_cast<int>(pDL->getStructLayout(StTy)->getElementOffset(Field));
|
|
}
|
|
Ty = StTy->getElementType(Field);
|
|
}
|
|
else {
|
|
Ty = GTI.getIndexedType();
|
|
if (const ConstantInt * CI = dyn_cast<ConstantInt>(Idx)) {
|
|
offset += int_cast<int>(
|
|
pDL->getTypeAllocSize(Ty) * CI->getSExtValue());
|
|
|
|
}
|
|
else
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
///
|
|
/// Replaces oldPtr with newPtr in a sample/ld intrinsic's argument list. The new instrinsic will
|
|
/// replace the old one in the module
|
|
///
|
|
void ChangePtrTypeInIntrinsic(llvm::GenIntrinsicInst*& pIntr, llvm::Value* oldPtr, llvm::Value* newPtr)
|
|
{
|
|
llvm::Module* pModule = pIntr->getParent()->getParent()->getParent();
|
|
llvm::Function* pCalledFunc = pIntr->getCalledFunction();
|
|
|
|
// Look at the intrinsic and figure out which pointer to change
|
|
int num_ops = pIntr->getNumArgOperands();
|
|
llvm::SmallVector<llvm::Value*, 5> args;
|
|
|
|
for (int i = 0; i < num_ops; ++i)
|
|
{
|
|
if (pIntr->getArgOperand(i) == oldPtr)
|
|
args.push_back(newPtr);
|
|
else
|
|
args.push_back(pIntr->getArgOperand(i));
|
|
}
|
|
|
|
llvm::Function* pNewIntr = nullptr;
|
|
llvm::SmallVector<llvm::Type*, 4> overloadedTys;
|
|
GenISAIntrinsic::ID id = pIntr->getIntrinsicID();
|
|
switch (id)
|
|
{
|
|
case llvm::GenISAIntrinsic::GenISA_ldmcsptr:
|
|
overloadedTys.push_back(pCalledFunc->getReturnType());
|
|
overloadedTys.push_back(args[0]->getType());
|
|
overloadedTys.push_back(newPtr->getType());
|
|
break;
|
|
case llvm::GenISAIntrinsic::GenISA_ldptr:
|
|
case llvm::GenISAIntrinsic::GenISA_ldmsptr:
|
|
overloadedTys.push_back(pCalledFunc->getReturnType());
|
|
overloadedTys.push_back(newPtr->getType());
|
|
break;
|
|
case llvm::GenISAIntrinsic::GenISA_resinfoptr:
|
|
case llvm::GenISAIntrinsic::GenISA_readsurfaceinfoptr:
|
|
case llvm::GenISAIntrinsic::GenISA_sampleinfoptr:
|
|
overloadedTys.push_back(newPtr->getType());
|
|
break;
|
|
case llvm::GenISAIntrinsic::GenISA_sampleptr:
|
|
case llvm::GenISAIntrinsic::GenISA_sampleBptr:
|
|
case llvm::GenISAIntrinsic::GenISA_sampleCptr:
|
|
case llvm::GenISAIntrinsic::GenISA_sampleDptr:
|
|
case llvm::GenISAIntrinsic::GenISA_sampleLptr:
|
|
case llvm::GenISAIntrinsic::GenISA_sampleBCptr:
|
|
case llvm::GenISAIntrinsic::GenISA_sampleDCptr:
|
|
case llvm::GenISAIntrinsic::GenISA_sampleLCptr:
|
|
case llvm::GenISAIntrinsic::GenISA_gather4ptr:
|
|
case llvm::GenISAIntrinsic::GenISA_gather4POptr:
|
|
case llvm::GenISAIntrinsic::GenISA_gather4Cptr:
|
|
case llvm::GenISAIntrinsic::GenISA_gather4POCptr:
|
|
case llvm::GenISAIntrinsic::GenISA_lodptr:
|
|
{
|
|
// Figure out the intrinsic operands for texture & sampler
|
|
llvm::Value* pTextureValue = nullptr, * pSamplerValue = nullptr;
|
|
getTextureAndSamplerOperands(pIntr, pTextureValue, pSamplerValue);
|
|
|
|
overloadedTys.push_back(pCalledFunc->getReturnType());
|
|
overloadedTys.push_back(pIntr->getOperand(0)->getType());
|
|
|
|
if (pTextureValue == oldPtr)
|
|
{
|
|
overloadedTys.push_back(newPtr->getType());
|
|
if (pSamplerValue)
|
|
{
|
|
// Samplerless messages will not have sampler in signature.
|
|
overloadedTys.push_back(pSamplerValue->getType());
|
|
}
|
|
}
|
|
else if (pSamplerValue == oldPtr)
|
|
{
|
|
overloadedTys.push_back(pTextureValue->getType());
|
|
overloadedTys.push_back(newPtr->getType());
|
|
}
|
|
|
|
break;
|
|
}
|
|
case llvm::GenISAIntrinsic::GenISA_typedread:
|
|
case llvm::GenISAIntrinsic::GenISA_typedwrite:
|
|
overloadedTys.push_back(newPtr->getType());
|
|
break;
|
|
case llvm::GenISAIntrinsic::GenISA_intatomicraw:
|
|
case llvm::GenISAIntrinsic::GenISA_icmpxchgatomicraw:
|
|
case llvm::GenISAIntrinsic::GenISA_intatomicrawA64:
|
|
case llvm::GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
|
|
case llvm::GenISAIntrinsic::GenISA_floatatomicraw:
|
|
case llvm::GenISAIntrinsic::GenISA_floatatomicrawA64:
|
|
case llvm::GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
|
|
case llvm::GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
|
|
overloadedTys.push_back(pIntr->getType());
|
|
overloadedTys.push_back(newPtr->getType());
|
|
if (id == GenISAIntrinsic::GenISA_intatomicrawA64)
|
|
{
|
|
args[0] = args[1];
|
|
args[1] = CastInst::CreatePointerCast(args[1], Type::getInt32Ty(pModule->getContext()), "", pIntr);
|
|
id = GenISAIntrinsic::GenISA_intatomicraw;
|
|
}
|
|
else if (id == GenISAIntrinsic::GenISA_icmpxchgatomicrawA64)
|
|
{
|
|
args[0] = args[1];
|
|
args[1] = CastInst::CreatePointerCast(args[1], Type::getInt32Ty(pModule->getContext()), "", pIntr);
|
|
id = GenISAIntrinsic::GenISA_icmpxchgatomicraw;
|
|
}
|
|
else if (id == GenISAIntrinsic::GenISA_floatatomicrawA64)
|
|
{
|
|
args[0] = args[1];
|
|
args[1] = CastInst::CreatePointerCast(args[1], Type::getFloatTy(pModule->getContext()), "", pIntr);
|
|
id = GenISAIntrinsic::GenISA_floatatomicraw;
|
|
}
|
|
else if (id == GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64)
|
|
{
|
|
args[0] = args[1];
|
|
args[1] = CastInst::CreatePointerCast(args[1], Type::getFloatTy(pModule->getContext()), "", pIntr);
|
|
id = GenISAIntrinsic::GenISA_fcmpxchgatomicraw;
|
|
}
|
|
break;
|
|
case llvm::GenISAIntrinsic::GenISA_dwordatomicstructured:
|
|
case llvm::GenISAIntrinsic::GenISA_floatatomicstructured:
|
|
case llvm::GenISAIntrinsic::GenISA_cmpxchgatomicstructured:
|
|
case llvm::GenISAIntrinsic::GenISA_fcmpxchgatomicstructured:
|
|
overloadedTys.push_back(pIntr->getType());
|
|
overloadedTys.push_back(args[0]->getType());
|
|
break;
|
|
case GenISAIntrinsic::GenISA_intatomictyped:
|
|
case GenISAIntrinsic::GenISA_icmpxchgatomictyped:
|
|
overloadedTys.push_back(pIntr->getType());
|
|
overloadedTys.push_back(newPtr->getType());
|
|
break;
|
|
case GenISAIntrinsic::GenISA_atomiccounterinc:
|
|
case GenISAIntrinsic::GenISA_atomiccounterpredec:
|
|
overloadedTys.push_back(pIntr->getType());
|
|
overloadedTys.push_back(args[0]->getType());
|
|
break;
|
|
//case llvm::GenISAIntrinsic::GenISA_ldrawvector_indexed:
|
|
case llvm::GenISAIntrinsic::GenISA_ldraw_indexed:
|
|
overloadedTys.push_back(pCalledFunc->getReturnType());
|
|
overloadedTys.push_back(newPtr->getType());
|
|
break;
|
|
case llvm::GenISAIntrinsic::GenISA_storeraw_indexed:
|
|
overloadedTys.push_back(newPtr->getType());
|
|
overloadedTys.push_back(args[2]->getType());
|
|
break;
|
|
default:
|
|
IGC_ASSERT_MESSAGE(0, "Unknown intrinsic encountered while changing pointer types");
|
|
break;
|
|
}
|
|
|
|
pNewIntr = llvm::GenISAIntrinsic::getDeclaration(
|
|
pModule,
|
|
id,
|
|
overloadedTys);
|
|
|
|
llvm::CallInst* pNewCall = llvm::CallInst::Create(pNewIntr, args, "", pIntr);
|
|
|
|
pIntr->replaceAllUsesWith(pNewCall);
|
|
pIntr->eraseFromParent();
|
|
|
|
pIntr = llvm::cast<llvm::GenIntrinsicInst>(pNewCall);
|
|
}
|
|
|
|
///
|
|
/// Returns the sampler/texture pointers for resource access intrinsics
|
|
///
|
|
void getTextureAndSamplerOperands(llvm::GenIntrinsicInst* pIntr, llvm::Value*& pTextureValue, llvm::Value*& pSamplerValue)
|
|
{
|
|
if (llvm::SamplerLoadIntrinsic * pSamplerLoadInst = llvm::dyn_cast<llvm::SamplerLoadIntrinsic>(pIntr))
|
|
{
|
|
pTextureValue = pSamplerLoadInst->getTextureValue();
|
|
pSamplerValue = nullptr;
|
|
}
|
|
else if (llvm::SampleIntrinsic * pSampleInst = llvm::dyn_cast<llvm::SampleIntrinsic>(pIntr))
|
|
{
|
|
pTextureValue = pSampleInst->getTextureValue();
|
|
pSamplerValue = pSampleInst->getSamplerValue();
|
|
}
|
|
else if (llvm::SamplerGatherIntrinsic * pGatherInst = llvm::dyn_cast<llvm::SamplerGatherIntrinsic>(pIntr))
|
|
{
|
|
pTextureValue = pGatherInst->getTextureValue();
|
|
pSamplerValue = pGatherInst->getSamplerValue();
|
|
}
|
|
else
|
|
{
|
|
pTextureValue = nullptr;
|
|
pSamplerValue = nullptr;
|
|
switch (pIntr->getIntrinsicID())
|
|
{
|
|
case llvm::GenISAIntrinsic::GenISA_resinfoptr:
|
|
case llvm::GenISAIntrinsic::GenISA_readsurfaceinfoptr:
|
|
case llvm::GenISAIntrinsic::GenISA_sampleinfoptr:
|
|
case llvm::GenISAIntrinsic::GenISA_typedwrite:
|
|
case llvm::GenISAIntrinsic::GenISA_typedread:
|
|
pTextureValue = pIntr->getOperand(0);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get the buffer pointer operand for supported buffer access instructions
|
|
Value* GetBufferOperand(Instruction* inst)
|
|
{
|
|
Value* pBuffer = nullptr;
|
|
if (LoadInst * load = dyn_cast<LoadInst>(inst))
|
|
{
|
|
pBuffer = load->getPointerOperand();
|
|
}
|
|
else if (StoreInst * store = dyn_cast<StoreInst>(inst))
|
|
{
|
|
pBuffer = store->getPointerOperand();
|
|
}
|
|
else if (GenIntrinsicInst * intr = dyn_cast<GenIntrinsicInst>(inst))
|
|
{
|
|
switch (intr->getIntrinsicID())
|
|
{
|
|
case GenISAIntrinsic::GenISA_storerawvector_indexed:
|
|
case GenISAIntrinsic::GenISA_ldrawvector_indexed:
|
|
case GenISAIntrinsic::GenISA_storeraw_indexed:
|
|
case GenISAIntrinsic::GenISA_ldraw_indexed:
|
|
case GenISAIntrinsic::GenISA_intatomicraw:
|
|
case GenISAIntrinsic::GenISA_intatomictyped:
|
|
case GenISAIntrinsic::GenISA_icmpxchgatomictyped:
|
|
case GenISAIntrinsic::GenISA_floatatomicraw:
|
|
case GenISAIntrinsic::GenISA_icmpxchgatomicraw:
|
|
case GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
|
|
case GenISAIntrinsic::GenISA_simdBlockRead:
|
|
case GenISAIntrinsic::GenISA_simdBlockWrite:
|
|
pBuffer = intr->getOperand(0);
|
|
break;
|
|
case GenISAIntrinsic::GenISA_intatomicrawA64:
|
|
case GenISAIntrinsic::GenISA_floatatomicrawA64:
|
|
case GenISAIntrinsic::GenISA_icmpxchgatomicrawA64:
|
|
case GenISAIntrinsic::GenISA_fcmpxchgatomicrawA64:
|
|
pBuffer = intr->getOperand(1);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
return pBuffer;
|
|
}
|
|
|
|
EOPCODE GetOpCode(const llvm::Instruction* inst)
|
|
{
|
|
if (const GenIntrinsicInst * CI = dyn_cast<GenIntrinsicInst>(inst))
|
|
{
|
|
unsigned ID = CI->getIntrinsicID();
|
|
return (EOPCODE)(OPCODE(ID, e_Intrinsic));
|
|
}
|
|
else if (const IntrinsicInst * CI = llvm::dyn_cast<llvm::IntrinsicInst>(inst))
|
|
{
|
|
unsigned ID = CI->getIntrinsicID();
|
|
return (EOPCODE)(OPCODE(ID, e_Intrinsic));
|
|
}
|
|
return (EOPCODE)(OPCODE(inst->getOpcode(), e_Instruction));
|
|
}
|
|
|
|
BufferType GetBufferType(uint addrSpace)
|
|
{
|
|
bool directIndexing = false;
|
|
unsigned int bufId = 0;
|
|
return DecodeAS4GFXResource(addrSpace, directIndexing, bufId);
|
|
}
|
|
|
|
bool IsReadOnlyLoadDirectCB(llvm::Instruction* pLLVMInst,
|
|
uint& cbId, llvm::Value*& eltPtrVal, BufferType& bufType)
|
|
{
|
|
LoadInst* inst = dyn_cast<LoadInst>(pLLVMInst);
|
|
if (!inst)
|
|
{
|
|
return false;
|
|
}
|
|
bool isInvLoad = inst->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
|
|
unsigned as = inst->getPointerAddressSpace();
|
|
bool directBuf;
|
|
// cbId gets filled in the following call;
|
|
bufType = IGC::DecodeAS4GFXResource(as, directBuf, cbId);
|
|
if ((bufType == CONSTANT_BUFFER || bufType == RESOURCE || isInvLoad) && directBuf)
|
|
{
|
|
Value* ptrVal = inst->getPointerOperand();
|
|
// skip bitcast and find the real address computation
|
|
while (isa<BitCastInst>(ptrVal))
|
|
{
|
|
ptrVal = cast<BitCastInst>(ptrVal)->getOperand(0);
|
|
}
|
|
if (isa<ConstantPointerNull>(ptrVal) ||
|
|
isa<IntToPtrInst>(ptrVal) ||
|
|
isa<GetElementPtrInst>(ptrVal) ||
|
|
isa<ConstantExpr>(ptrVal) ||
|
|
isa<Argument>(ptrVal))
|
|
{
|
|
eltPtrVal = ptrVal;
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool IsLoadFromDirectCB(llvm::Instruction* pLLVMInst, uint& cbId, llvm::Value*& eltPtrVal)
|
|
{
|
|
BufferType bufType = BUFFER_TYPE_UNKNOWN;
|
|
bool isReadOnly = IsReadOnlyLoadDirectCB(pLLVMInst, cbId, eltPtrVal, bufType);
|
|
return isReadOnly && bufType == CONSTANT_BUFFER;
|
|
}
|
|
|
|
/// this is texture-load not buffer-load
|
|
bool isLdInstruction(llvm::Instruction* inst)
|
|
{
|
|
return isa<SamplerLoadIntrinsic>(inst);
|
|
}
|
|
|
|
// function returns the position of the texture operand for sample/ld instructions
|
|
llvm::Value* getTextureIndexArgBasedOnOpcode(llvm::Instruction* inst)
|
|
{
|
|
if (isLdInstruction(inst))
|
|
{
|
|
return cast<SamplerLoadIntrinsic>(inst)->getTextureValue();
|
|
}
|
|
else if (isSampleInstruction(inst))
|
|
{
|
|
return cast<SampleIntrinsic>(inst)->getTextureValue();
|
|
}
|
|
else if (isGather4Instruction(inst))
|
|
{
|
|
return cast<SamplerGatherIntrinsic>(inst)->getTextureValue();
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
int findSampleInstructionTextureIdx(llvm::Instruction* inst)
|
|
{
|
|
// fetch the textureArgIdx.
|
|
Value* ptr = getTextureIndexArgBasedOnOpcode(inst);
|
|
unsigned textureIdx = -1;
|
|
|
|
if (ptr && ptr->getType()->isPointerTy())
|
|
{
|
|
BufferType bufType = BUFFER_TYPE_UNKNOWN;
|
|
if (!(isa<GenIntrinsicInst>(ptr) &&
|
|
cast<GenIntrinsicInst>(ptr)->getIntrinsicID() == GenISAIntrinsic::GenISA_GetBufferPtr))
|
|
{
|
|
uint as = ptr->getType()->getPointerAddressSpace();
|
|
bool directIndexing;
|
|
bufType = DecodeAS4GFXResource(as, directIndexing, textureIdx);
|
|
if (bufType == UAV)
|
|
{
|
|
// dont do any clustering on read/write images
|
|
textureIdx = -1;
|
|
}
|
|
}
|
|
}
|
|
else if (ptr)
|
|
{
|
|
if (llvm::dyn_cast<llvm::ConstantInt>(ptr))
|
|
{
|
|
textureIdx = int_cast<unsigned>(GetImmediateVal(ptr));
|
|
}
|
|
}
|
|
|
|
return textureIdx;
|
|
}
|
|
|
|
bool isSampleLoadGather4InfoInstruction(llvm::Instruction* inst)
|
|
{
|
|
if (isa<GenIntrinsicInst>(inst))
|
|
{
|
|
switch ((cast<GenIntrinsicInst>(inst))->getIntrinsicID())
|
|
{
|
|
case GenISAIntrinsic::GenISA_sampleptr:
|
|
case GenISAIntrinsic::GenISA_sampleBptr:
|
|
case GenISAIntrinsic::GenISA_sampleCptr:
|
|
case GenISAIntrinsic::GenISA_sampleDptr:
|
|
case GenISAIntrinsic::GenISA_sampleDCptr:
|
|
case GenISAIntrinsic::GenISA_sampleLptr:
|
|
case GenISAIntrinsic::GenISA_sampleLCptr:
|
|
case GenISAIntrinsic::GenISA_sampleBCptr:
|
|
case GenISAIntrinsic::GenISA_lodptr:
|
|
case GenISAIntrinsic::GenISA_ldptr:
|
|
case GenISAIntrinsic::GenISA_ldmsptr:
|
|
case GenISAIntrinsic::GenISA_ldmsptr16bit:
|
|
case GenISAIntrinsic::GenISA_ldmcsptr:
|
|
case GenISAIntrinsic::GenISA_sampleinfoptr:
|
|
case GenISAIntrinsic::GenISA_resinfoptr:
|
|
case GenISAIntrinsic::GenISA_gather4ptr:
|
|
case GenISAIntrinsic::GenISA_gather4Cptr:
|
|
case GenISAIntrinsic::GenISA_gather4POptr:
|
|
case GenISAIntrinsic::GenISA_gather4POCptr:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool isSampleInstruction(llvm::Instruction* inst)
|
|
{
|
|
return isa<SampleIntrinsic>(inst);
|
|
}
|
|
|
|
bool isInfoInstruction(llvm::Instruction* inst)
|
|
{
|
|
return isa<InfoIntrinsic>(inst);
|
|
}
|
|
|
|
bool isGather4Instruction(llvm::Instruction* inst)
|
|
{
|
|
return isa<SamplerGatherIntrinsic>(inst);
|
|
}
|
|
|
|
bool IsMediaIOIntrinsic(llvm::Instruction* inst)
|
|
{
|
|
if (auto * pGI = dyn_cast<llvm::GenIntrinsicInst>(inst))
|
|
{
|
|
GenISAIntrinsic::ID id = pGI->getIntrinsicID();
|
|
|
|
return id == GenISAIntrinsic::GenISA_MediaBlockRead ||
|
|
id == GenISAIntrinsic::GenISA_MediaBlockWrite;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool IsSIMDBlockIntrinsic(llvm::Instruction* inst)
|
|
{
|
|
if (auto * pGI = dyn_cast<llvm::GenIntrinsicInst>(inst))
|
|
{
|
|
GenISAIntrinsic::ID id = pGI->getIntrinsicID();
|
|
|
|
return id == GenISAIntrinsic::GenISA_simdBlockRead ||
|
|
id == GenISAIntrinsic::GenISA_simdBlockWrite;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool isSubGroupIntrinsic(const llvm::Instruction* I)
|
|
{
|
|
const GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
|
|
if (!GII)
|
|
return false;
|
|
|
|
switch (GII->getIntrinsicID())
|
|
{
|
|
case GenISAIntrinsic::GenISA_WaveShuffleIndex:
|
|
case GenISAIntrinsic::GenISA_simdShuffleDown:
|
|
case GenISAIntrinsic::GenISA_simdBlockRead:
|
|
case GenISAIntrinsic::GenISA_simdBlockWrite:
|
|
case GenISAIntrinsic::GenISA_simdMediaBlockRead:
|
|
case GenISAIntrinsic::GenISA_simdMediaBlockWrite:
|
|
case GenISAIntrinsic::GenISA_MediaBlockWrite:
|
|
case GenISAIntrinsic::GenISA_MediaBlockRead:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool isURBWriteIntrinsic(const llvm::Instruction* I)
|
|
{
|
|
const GenIntrinsicInst* GII = dyn_cast<GenIntrinsicInst>(I);
|
|
if (!GII)
|
|
return false;
|
|
|
|
return GII->getIntrinsicID() == GenISA_URBWrite;
|
|
|
|
}
|
|
|
|
llvm::Instruction* AdjustSystemValueCall(llvm::GenIntrinsicInst* inst)
|
|
{
|
|
IGC_ASSERT(inst->getIntrinsicID() == GenISAIntrinsic::GenISA_DCL_SystemValue);
|
|
llvm::Module* pModule = inst->getParent()->getParent()->getParent();
|
|
auto CommonConvertFunc = [pModule](llvm::GenIntrinsicInst* inst, llvm::Type* outputType)
|
|
{
|
|
IGC_ASSERT(outputType->isVectorTy() == false);
|
|
IGC_ASSERT(inst->getType()->isVectorTy() == false);
|
|
llvm::Instruction* result = inst;
|
|
if (inst->getType() != outputType)
|
|
{
|
|
llvm::IRBuilder<> builder(inst);
|
|
llvm::Function* systemValueFunc = llvm::GenISAIntrinsic::getDeclaration(pModule, GenISAIntrinsic::GenISA_DCL_SystemValue, outputType);
|
|
llvm::Instruction* sgv = builder.CreateCall(systemValueFunc, inst->getOperand(0));
|
|
// a default system value intrinsic function returns a float value. The returned value is bit casted to an appropriate integer or floating point value
|
|
// in reference to HW specification. Casting from floating point to integer and in the opposite direction is not expected.
|
|
sgv = llvm::cast<llvm::Instruction>(builder.CreateZExtOrTrunc(sgv, builder.getIntNTy((unsigned int)inst->getType()->getPrimitiveSizeInBits())));
|
|
sgv = llvm::cast<llvm::Instruction>(builder.CreateBitCast(sgv, inst->getType()));
|
|
inst->replaceAllUsesWith(sgv);
|
|
inst->eraseFromParent();
|
|
result = sgv;
|
|
}
|
|
return result;
|
|
};
|
|
|
|
SGVUsage usage = static_cast<SGVUsage>(llvm::cast<llvm::ConstantInt>(inst->getOperand(0))->getZExtValue());
|
|
llvm::Instruction* result = inst;
|
|
|
|
switch (usage)
|
|
{
|
|
case THREAD_ID_IN_GROUP_X:
|
|
case THREAD_ID_IN_GROUP_Y:
|
|
case THREAD_ID_IN_GROUP_Z:
|
|
result = CommonConvertFunc(inst, llvm::IntegerType::get(pModule->getContext(), 16));
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
return result;
|
|
}
|
|
|
|
bool isReadInput(llvm::Instruction* pLLVMInstr);
|
|
|
|
#define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
|
|
case name:\
|
|
return modifiers;
|
|
bool SupportsModifier(llvm::Instruction* inst)
|
|
{
|
|
if (llvm::CmpInst * cmp = dyn_cast<llvm::ICmpInst>(inst))
|
|
{
|
|
// special case, cmp supports modifier unless it is unsigned
|
|
return !cmp->isUnsigned();
|
|
}
|
|
switch (GetOpCode(inst))
|
|
{
|
|
#include "opCode.h"
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
#undef DECLARE_OPCODE
|
|
|
|
#define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
|
|
case name:\
|
|
return sat;
|
|
bool SupportsSaturate(llvm::Instruction* inst)
|
|
{
|
|
switch (GetOpCode(inst))
|
|
{
|
|
#include "opCode.h"
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
#undef DECLARE_OPCODE
|
|
|
|
#define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
|
|
case name:\
|
|
return pred;
|
|
bool SupportsPredicate(llvm::Instruction* inst)
|
|
{
|
|
switch (GetOpCode(inst))
|
|
{
|
|
#include "opCode.h"
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
#undef DECLARE_OPCODE
|
|
|
|
#define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
|
|
case name:\
|
|
return condMod;
|
|
bool SupportsCondModifier(llvm::Instruction* inst)
|
|
{
|
|
switch (GetOpCode(inst))
|
|
{
|
|
#include "opCode.h"
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
#undef DECLARE_OPCODE
|
|
|
|
#define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
|
|
case name:\
|
|
return regioning;
|
|
bool SupportsRegioning(llvm::Instruction* inst)
|
|
{
|
|
switch (GetOpCode(inst))
|
|
{
|
|
#include "opCode.h"
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
#undef DECLARE_OPCODE
|
|
|
|
#define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
|
|
case name:\
|
|
return mathIntrinsic;
|
|
bool IsMathIntrinsic(EOPCODE opcode)
|
|
{
|
|
switch (opcode)
|
|
{
|
|
#include "opCode.h"
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
#undef DECLARE_OPCODE
|
|
|
|
#define DECLARE_OPCODE(instName, llvmType, name, modifiers, sat, pred, condMod, mathIntrinsic, atomicIntrinsic, regioning) \
|
|
case name:\
|
|
return atomicIntrinsic;
|
|
bool IsAtomicIntrinsic(EOPCODE opcode)
|
|
{
|
|
switch (opcode)
|
|
{
|
|
#include "opCode.h"
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
#undef DECLARE_OPCODE
|
|
|
|
bool IsExtendedMathInstruction(llvm::Instruction* Inst)
|
|
{
|
|
EOPCODE opcode = GetOpCode(Inst);
|
|
switch (opcode)
|
|
{
|
|
case llvm_fdiv:
|
|
case llvm_sdiv:
|
|
case llvm_udiv:
|
|
case llvm_log:
|
|
case llvm_exp:
|
|
case llvm_sqrt:
|
|
case llvm_sin:
|
|
case llvm_cos:
|
|
case llvm_pow:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
return false;
|
|
}
|
|
// for now just include shuffle, reduce and scan,
|
|
// which have simd32 implementations and should not be split into two instances
|
|
bool IsSubGroupIntrinsicWithSimd32Implementation(EOPCODE opcode)
|
|
{
|
|
return (opcode == llvm_waveAll ||
|
|
opcode == llvm_waveClustered ||
|
|
opcode == llvm_wavePrefix ||
|
|
opcode == llvm_waveShuffleIndex ||
|
|
opcode == llvm_simdShuffleDown ||
|
|
opcode == llvm_simdBlockRead||
|
|
opcode == llvm_simdBlockReadBindless);
|
|
}
|
|
|
|
|
|
bool IsGradientIntrinsic(EOPCODE opcode)
|
|
{
|
|
return(opcode == llvm_gradientX ||
|
|
opcode == llvm_gradientY ||
|
|
opcode == llvm_gradientXfine ||
|
|
opcode == llvm_gradientYfine);
|
|
}
|
|
|
|
bool IsStatelessMemLoadIntrinsic(const llvm::GenIntrinsicInst& inst)
|
|
{
|
|
switch(inst.getIntrinsicID())
|
|
{
|
|
case GenISAIntrinsic::GenISA_simdBlockRead:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool IsStatelessMemStoreIntrinsic(const llvm::GenIntrinsicInst& inst)
|
|
{
|
|
switch (inst.getIntrinsicID()) {
|
|
case GenISAIntrinsic::GenISA_simdBlockWrite:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool IsStatelessMemAtomicIntrinsic(const llvm::GenIntrinsicInst& inst)
|
|
{
|
|
// This includes:
|
|
// GenISA_intatomicraw
|
|
// GenISA_floatatomicraw
|
|
// GenISA_intatomicrawA64
|
|
// GenISA_floatatomicrawA64
|
|
// GenISA_icmpxchgatomicraw
|
|
// GenISA_fcmpxchgatomicraw
|
|
// GenISA_icmpxchgatomicrawA64
|
|
// GenISA_fcmpxchgatomicrawA64
|
|
if (IsAtomicIntrinsic(GetOpCode(&inst)))
|
|
return true;
|
|
|
|
return false;
|
|
}
|
|
|
|
bool ComputesGradient(llvm::Instruction* inst)
|
|
{
|
|
llvm::SampleIntrinsic* sampleInst = dyn_cast<llvm::SampleIntrinsic>(inst);
|
|
if (sampleInst && sampleInst->IsDerivative())
|
|
{
|
|
return true;
|
|
}
|
|
if (IsGradientIntrinsic(GetOpCode(inst)))
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
uint getImmValueU32(const llvm::Value* value)
|
|
{
|
|
const llvm::ConstantInt* cval = llvm::cast<llvm::ConstantInt>(value);
|
|
IGC_ASSERT(nullptr != cval);
|
|
IGC_ASSERT(cval->getBitWidth() == 32);
|
|
|
|
uint ival = int_cast<uint>(cval->getZExtValue());
|
|
return ival;
|
|
}
|
|
|
|
llvm::Value* ExtractElementFromInsertChain(llvm::Value* inst, int pos)
|
|
{
|
|
|
|
llvm::ConstantDataVector* cstV = llvm::dyn_cast<llvm::ConstantDataVector>(inst);
|
|
if (cstV != NULL) {
|
|
return cstV->getElementAsConstant(pos);
|
|
}
|
|
|
|
llvm::InsertElementInst* ie = llvm::dyn_cast<llvm::InsertElementInst>(inst);
|
|
while (ie != NULL) {
|
|
int64_t iOffset = llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2))->getSExtValue();
|
|
IGC_ASSERT(iOffset >= 0);
|
|
if (iOffset == pos) {
|
|
return ie->getOperand(1);
|
|
}
|
|
llvm::Value* insertBase = ie->getOperand(0);
|
|
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
bool ExtractVec4FromInsertChain(llvm::Value* inst, llvm::Value* elem[4], llvm::SmallVector<llvm::Instruction*, 10> & instructionToRemove)
|
|
{
|
|
llvm::ConstantDataVector* cstV = llvm::dyn_cast<llvm::ConstantDataVector>(inst);
|
|
if (cstV != NULL) {
|
|
IGC_ASSERT(cstV->getNumElements() == 4);
|
|
for (int i = 0; i < 4; i++) {
|
|
elem[i] = cstV->getElementAsConstant(i);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
elem[i] = NULL;
|
|
}
|
|
|
|
int count = 0;
|
|
llvm::InsertElementInst* ie = llvm::dyn_cast<llvm::InsertElementInst>(inst);
|
|
while (ie != NULL) {
|
|
int64_t iOffset = llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2))->getSExtValue();
|
|
IGC_ASSERT(iOffset >= 0);
|
|
if (elem[iOffset] == NULL) {
|
|
elem[iOffset] = ie->getOperand(1);
|
|
count++;
|
|
if (ie->hasOneUse()) {
|
|
instructionToRemove.push_back(ie);
|
|
}
|
|
}
|
|
llvm::Value* insertBase = ie->getOperand(0);
|
|
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
|
|
}
|
|
return (count == 4);
|
|
}
|
|
|
|
void VectorToElement(llvm::Value* inst, llvm::Value* elem[], llvm::Type* int32Ty, llvm::Instruction* insert_before, int vsize)
|
|
{
|
|
for (int i = 0; i < vsize; i++) {
|
|
if (elem[i] == nullptr) {
|
|
// Create an ExtractElementInst
|
|
elem[i] = llvm::ExtractElementInst::Create(inst, llvm::ConstantInt::get(int32Ty, i), "", insert_before);
|
|
}
|
|
}
|
|
}
|
|
|
|
llvm::Value* ElementToVector(llvm::Value* elem[], llvm::Type* int32Ty, llvm::Instruction* insert_before, int vsize)
|
|
{
|
|
llvm::VectorType* vt = IGCLLVM::FixedVectorType::get(elem[0]->getType(), vsize);
|
|
llvm::Value* vecValue = llvm::UndefValue::get(vt);
|
|
|
|
for (int i = 0; i < vsize; ++i)
|
|
{
|
|
|
|
vecValue = llvm::InsertElementInst::Create(vecValue, elem[i], llvm::ConstantInt::get(int32Ty, i), "", insert_before);
|
|
((Instruction*)vecValue)->setDebugLoc(insert_before->getDebugLoc());
|
|
}
|
|
return vecValue;
|
|
}
|
|
|
|
llvm::Value* ConvertToFloat(llvm::IRBuilder<>& builder, llvm::Value* val)
|
|
{
|
|
llvm::Value* ret = val;
|
|
llvm::Type* type = val->getType();
|
|
IGC_ASSERT(nullptr != type);
|
|
IGC_ASSERT_MESSAGE(type->isSingleValueType(), "Only scalar data is supported here");
|
|
IGC_ASSERT_MESSAGE(!type->isVectorTy(), "Only scalar data is supported here");
|
|
IGC_ASSERT((type->getTypeID() == Type::FloatTyID) || (type->getTypeID() == Type::HalfTyID) || (type->getTypeID() == Type::IntegerTyID) || (type->getTypeID() == Type::DoubleTyID));
|
|
|
|
unsigned dataSize = type->getScalarSizeInBits();
|
|
if (16 == dataSize){
|
|
ret = builder.CreateFPExt(builder.CreateBitCast(val, builder.getHalfTy()), builder.getFloatTy());
|
|
}else if (32 == dataSize){
|
|
ret = builder.CreateBitCast(val, builder.getFloatTy());
|
|
}else if (64 == dataSize){
|
|
llvm::Type* vecType = IGCLLVM::FixedVectorType::get(builder.getFloatTy(), 2);
|
|
ret = builder.CreateBitCast(val, vecType);
|
|
}else{
|
|
IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported type in ConvertToFloat of helper.");
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
void ConvertToFloat(llvm::IRBuilder<>& builder, llvm::SmallVectorImpl<llvm::Value*>& instList)
|
|
{
|
|
for (size_t i=0; i<instList.size(); i++)
|
|
{
|
|
llvm::Value* val = ConvertToFloat(builder, instList[i]);
|
|
if (val->getType()->isVectorTy())
|
|
{
|
|
instList[i] = builder.CreateExtractElement(val, static_cast<uint64_t>(0));
|
|
size_t iOld = i;
|
|
for (unsigned j = 1; j < cast<VectorType>(val->getType())->getNumElements(); j++)
|
|
{
|
|
instList.insert(instList.begin()+ iOld +j, builder.CreateExtractElement(val, j));
|
|
i++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
instList[i] = val;
|
|
}
|
|
}
|
|
}
|
|
|
|
void ScalarizeAggregateMembers(llvm::IRBuilder<>& builder, llvm::Value* val, llvm::SmallVectorImpl<llvm::Value*> & instList)
|
|
{
|
|
llvm::Type* type = val->getType();
|
|
unsigned num = 0;
|
|
switch (type->getTypeID())
|
|
{
|
|
case llvm::Type::FloatTyID:
|
|
case llvm::Type::HalfTyID:
|
|
case llvm::Type::IntegerTyID:
|
|
case llvm::Type::DoubleTyID:
|
|
instList.push_back(val);
|
|
break;
|
|
case llvm::Type::StructTyID:
|
|
num = type->getStructNumElements();
|
|
for (unsigned i = 0; i < num; i++)
|
|
{
|
|
ScalarizeAggregateMembers(builder, builder.CreateExtractValue(val, i), instList);
|
|
}
|
|
break;
|
|
case IGCLLVM::VectorTyID:
|
|
num = (unsigned)cast<VectorType>(type)->getNumElements();
|
|
for (unsigned i = 0; i < num; i++)
|
|
{
|
|
ScalarizeAggregateMembers(builder, builder.CreateExtractElement(val, i), instList);
|
|
}
|
|
break;
|
|
case llvm::Type::ArrayTyID:
|
|
num = static_cast<uint32_t>(type->getArrayNumElements());
|
|
for (unsigned i = 0; i < num; i++)
|
|
{
|
|
ScalarizeAggregateMembers(builder, builder.CreateExtractValue(val, i), instList);
|
|
}
|
|
break;
|
|
default:
|
|
IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported type in ScalarizeAggregateMembers of helper! Please enhance this function first.");
|
|
break;
|
|
}
|
|
}
|
|
|
|
void ScalarizeAggregateMemberAddresses(llvm::IRBuilder<>& builder, llvm::Type* type, llvm::Value* val, llvm::SmallVectorImpl<llvm::Value*> & instList, llvm::SmallVector<llvm::Value*, 16> indices)
|
|
{
|
|
unsigned num = 0;
|
|
switch (type->getTypeID())
|
|
{
|
|
case llvm::Type::FloatTyID:
|
|
case llvm::Type::HalfTyID:
|
|
case llvm::Type::IntegerTyID:
|
|
case llvm::Type::DoubleTyID:
|
|
instList.push_back(builder.CreateInBoundsGEP(val, makeArrayRef(indices)));
|
|
break;
|
|
case llvm::Type::StructTyID:
|
|
num = type->getStructNumElements();
|
|
for (unsigned i = 0; i < num; i++)
|
|
{
|
|
indices.push_back(builder.getInt32(i));
|
|
ScalarizeAggregateMemberAddresses(builder, type->getStructElementType(i), val, instList, indices);
|
|
indices.pop_back();
|
|
}
|
|
break;
|
|
case IGCLLVM::VectorTyID:
|
|
num = (unsigned)cast<VectorType>(type)->getNumElements();
|
|
for (unsigned i = 0; i < num; i++)
|
|
{
|
|
indices.push_back(builder.getInt32(i));
|
|
ScalarizeAggregateMemberAddresses(builder, cast<VectorType>(type)->getElementType(), val, instList, indices);
|
|
indices.pop_back();
|
|
}
|
|
break;
|
|
case llvm::Type::ArrayTyID:
|
|
//fix this if one API could support an array with length > 2^32
|
|
num = static_cast<uint32_t>(type->getArrayNumElements());
|
|
for (unsigned i = 0; i < num; i++)
|
|
{
|
|
indices.push_back(builder.getInt32(i));
|
|
ScalarizeAggregateMemberAddresses(builder, type->getArrayElementType(), val, instList, indices);
|
|
indices.pop_back();
|
|
}
|
|
break;
|
|
default:
|
|
IGC_ASSERT_EXIT_MESSAGE(0, "Unsupported type in ScalarizeAggregateMemberAddresses of helper! Please enhance this function first.");
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool IsUnsignedCmp(const llvm::CmpInst::Predicate Pred)
|
|
{
|
|
switch (Pred) {
|
|
case llvm::CmpInst::ICMP_UGT:
|
|
case llvm::CmpInst::ICMP_UGE:
|
|
case llvm::CmpInst::ICMP_ULT:
|
|
case llvm::CmpInst::ICMP_ULE:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool IsSignedCmp(const llvm::CmpInst::Predicate Pred)
|
|
{
|
|
switch (Pred)
|
|
{
|
|
case llvm::CmpInst::ICMP_SGT:
|
|
case llvm::CmpInst::ICMP_SGE:
|
|
case llvm::CmpInst::ICMP_SLT:
|
|
case llvm::CmpInst::ICMP_SLE:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// isA64Ptr - Queries whether given pointer type requires 64-bit representation in vISA
|
|
bool isA64Ptr(llvm::PointerType* PT, CodeGenContext* pContext)
|
|
{
|
|
return pContext->getRegisterPointerSizeInBits(PT->getAddressSpace()) == 64;
|
|
}
|
|
|
|
bool IsBitCastForLifetimeMark(const llvm::Value* V)
|
|
{
|
|
if (!V || !llvm::isa<llvm::BitCastInst>(V))
|
|
{
|
|
return false;
|
|
}
|
|
for (llvm::Value::const_user_iterator it = V->user_begin(), e = V->user_end(); it != e; ++it)
|
|
{
|
|
const llvm::IntrinsicInst* inst = llvm::dyn_cast<const llvm::IntrinsicInst>(*it);
|
|
if (!inst)
|
|
{
|
|
return false;
|
|
}
|
|
llvm::Intrinsic::ID IID = inst->getIntrinsicID();
|
|
if (IID != llvm::Intrinsic::lifetime_start &&
|
|
IID != llvm::Intrinsic::lifetime_end)
|
|
{
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
Value* mutatePtrType(Value* ptrv, PointerType* newType,
|
|
IRBuilder<>& builder, const Twine&)
|
|
{
|
|
if (isa<ConstantPointerNull>(ptrv))
|
|
{
|
|
return ConstantPointerNull::get(newType);
|
|
}
|
|
else
|
|
{
|
|
if (ConstantExpr * cexpr = dyn_cast<ConstantExpr>(ptrv))
|
|
{
|
|
IGC_ASSERT(cexpr->getOpcode() == Instruction::IntToPtr);
|
|
Value* offset = cexpr->getOperand(0);
|
|
ptrv = builder.CreateIntToPtr(offset, newType);
|
|
}
|
|
else
|
|
{
|
|
ptrv->mutateType(newType);
|
|
}
|
|
}
|
|
return ptrv;
|
|
}
|
|
|
|
/*
|
|
cmp.l.f0.0 (8) null:d r0.0<0;1,0>:w 0x0000:w { Align1, N1, NoMask, NoCompact }
|
|
(-f0.0) jmpi Test
|
|
(-f0.0) sendc (8) null:ud r120.0<0;1,0>:f 0x00000025 0x08031400:ud { Align1, N1, EOT, NoCompact }
|
|
nop
|
|
Test :
|
|
nop
|
|
|
|
*/
|
|
|
|
static const unsigned int CRastHeader_SIMD8[] =
|
|
{
|
|
0x05600010,0x20001a24,0x1e000000,0x00000000,
|
|
0x00110020,0x34000004,0x0e001400,0x00000020,
|
|
0x05710032,0x20003a00,0x06000f00,0x88031400,
|
|
0x00000000,0x00000000,0x00000000,0x00000000,
|
|
};
|
|
|
|
/*
|
|
cmp.l.f0.0 (16) null:d r0.0 < 0; 1, 0 > : w 0x0000 : w{ Align1, N1, NoMask, NoCompact }
|
|
(-f0.0) jmpi(1) Test { Align1, N1, NoMask, NoCompact }
|
|
(-f0.0) sendc(16) null : ud r120.0 < 0; 1, 0 > : f 0x00000025 0x90031000 : ud{ Align1, N1, EOT, NoCompact }
|
|
nop
|
|
Test :
|
|
nop
|
|
|
|
*/
|
|
static const unsigned int CRastHeader_SIMD16[] =
|
|
{
|
|
0x05800010, 0x20001A24, 0x1E000000, 0x00000000,
|
|
0x00110020, 0x34000004, 0x0E001400, 0x00000020,
|
|
0x05910032, 0x20003A00, 0x06000F00, 0x90031000,
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
};
|
|
|
|
/*
|
|
cmp.l.f0.0 (16) null:d r0.0 < 0; 1, 0 > : w 0x0000 : w{ Align1, N1, NoMask, NoCompact }
|
|
(-f0.0) jmpi Test
|
|
(-f0.0) sendc(16) null : w r120.0 < 0; 1, 0 > : ud 0x00000005 0x10031000 : ud{ Align1, N1, NoCompact }
|
|
(-f0.0) sendc(16) null : w r120.0 < 0; 1, 0 > : f 0x00000025 0x10031800 : ud{ Align1, N5, EOT, NoCompact }
|
|
nop
|
|
Test :
|
|
nop
|
|
|
|
*/
|
|
|
|
static const unsigned int CRastHeader_SIMD32[] =
|
|
{
|
|
0x05800010,0x20001a24,0x1e000000,0x00000000,
|
|
0x00110020,0x34000004,0x0e001400,0x00000020,
|
|
0x05910032,0x20000260,0x06000f00,0x10031000,
|
|
0x05912032,0x20003a60,0x06000f00,0x90031800,
|
|
};
|
|
|
|
|
|
unsigned int AppendConservativeRastWAHeader(IGC::SProgramOutput* program, SIMDMode simdmode)
|
|
{
|
|
unsigned int headerSize = 0;
|
|
const unsigned int* pHeader = nullptr;
|
|
|
|
if (program && (program->m_programSize > 0))
|
|
{
|
|
switch (simdmode)
|
|
{
|
|
case SIMDMode::SIMD8:
|
|
headerSize = sizeof(CRastHeader_SIMD8);
|
|
pHeader = CRastHeader_SIMD8;
|
|
break;
|
|
|
|
case SIMDMode::SIMD16:
|
|
headerSize = sizeof(CRastHeader_SIMD16);
|
|
pHeader = CRastHeader_SIMD16;
|
|
break;
|
|
|
|
case SIMDMode::SIMD32:
|
|
headerSize = sizeof(CRastHeader_SIMD32);
|
|
pHeader = CRastHeader_SIMD32;
|
|
break;
|
|
|
|
default:
|
|
IGC_ASSERT_MESSAGE(0, "Invalid SIMD Mode for Conservative Raster WA");
|
|
break;
|
|
}
|
|
|
|
unsigned int newSize = program->m_programSize + headerSize;
|
|
void* newBinary = IGC::aligned_malloc(newSize, 16);
|
|
memcpy_s(newBinary, newSize, pHeader, headerSize);
|
|
memcpy_s((char*)newBinary + headerSize, newSize, program->m_programBin, program->m_programSize);
|
|
IGC::aligned_free(program->m_programBin);
|
|
program->m_programBin = newBinary;
|
|
program->m_programSize = newSize;
|
|
}
|
|
return headerSize;
|
|
}
|
|
|
|
bool DSDualPatchEnabled(class CodeGenContext* ctx)
|
|
{
|
|
return ctx->platform.supportDSDualPatchDispatch() &&
|
|
ctx->platform.WaDisableDSDualPatchMode() &&
|
|
!(ctx->m_DriverInfo.APIDisableDSDualPatchDispatch()) &&
|
|
IGC_IS_FLAG_DISABLED(DisableDSDualPatch);
|
|
}
|
|
|
|
|
|
Function* getUniqueEntryFunc(const IGCMD::MetaDataUtils* pM, IGC::ModuleMetaData* pModMD)
|
|
{
|
|
Function* entryFunc = nullptr;
|
|
auto& FuncMD = pModMD->FuncMD;
|
|
for (auto i = pM->begin_FunctionsInfo(), e = pM->end_FunctionsInfo(); i != e; ++i)
|
|
{
|
|
IGCMD::FunctionInfoMetaDataHandle Info = i->second;
|
|
if (Info->getType() != FunctionTypeMD::KernelFunction)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
Function* F = i->first;
|
|
if (!entryFunc)
|
|
{
|
|
entryFunc = F;
|
|
}
|
|
|
|
auto fi = FuncMD.find(F);
|
|
if (fi != FuncMD.end() && fi->second.isUniqueEntry)
|
|
{
|
|
return F;
|
|
}
|
|
}
|
|
IGC_ASSERT_MESSAGE(nullptr != entryFunc, "No entry func!");
|
|
auto ei = FuncMD.find(entryFunc);
|
|
IGC_ASSERT(ei != FuncMD.end());
|
|
ei->second.isUniqueEntry = true;
|
|
return entryFunc;
|
|
}
|
|
|
|
// If true, the codegen will likely not emit instruction for this instruction.
|
|
bool isNoOpInst(Instruction* I, CodeGenContext* Ctx)
|
|
{
|
|
if (isa<BitCastInst>(I) ||
|
|
isa<IntToPtrInst>(I) ||
|
|
isa<PtrToIntInst>(I))
|
|
{
|
|
// Don't bother with constant operands
|
|
if (isa<Constant>(I->getOperand(0))) {
|
|
return false;
|
|
}
|
|
|
|
Type* dTy = I->getType();
|
|
Type* sTy = I->getOperand(0)->getType();
|
|
PointerType* dPTy = dyn_cast<PointerType>(dTy);
|
|
PointerType* sPTy = dyn_cast<PointerType>(sTy);
|
|
uint32_t dBits = dPTy ? Ctx->getRegisterPointerSizeInBits(dPTy->getAddressSpace())
|
|
: (unsigned int)dTy->getPrimitiveSizeInBits();
|
|
uint32_t sBits = sPTy ? Ctx->getRegisterPointerSizeInBits(sPTy->getAddressSpace())
|
|
: (unsigned int)sTy->getPrimitiveSizeInBits();
|
|
if (dBits == 0 || sBits == 0 || dBits != sBits) {
|
|
// Not primitive type or not equal in size (inttoptr, etc)
|
|
return false;
|
|
}
|
|
|
|
VectorType* dVTy = dyn_cast<VectorType>(dTy);
|
|
VectorType* sVTy = dyn_cast<VectorType>(sTy);
|
|
int d_nelts = dVTy ? (int)dVTy->getNumElements() : 1;
|
|
int s_nelts = sVTy ? (int)sVTy->getNumElements() : 1;
|
|
if (d_nelts != s_nelts) {
|
|
// Vector relayout bitcast.
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
//
|
|
// Given a value, check if it is likely a positive number.
|
|
//
|
|
// This function works best if llvm.assume() is used in the bif libraries to
|
|
// give ValueTracking hints. ex:
|
|
//
|
|
// size_t get_local_id(uint dim)
|
|
// {
|
|
// size_t ret = __builtin_IB_get_local_id()
|
|
// __builtin_assume(ret >= 0);
|
|
// __builtin_assume(ret <= 0x0000ffff)
|
|
// return ret;
|
|
// }
|
|
//
|
|
// This implementation relies completly on native llvm functions
|
|
//
|
|
//
|
|
//
|
|
bool valueIsPositive(
|
|
Value* V,
|
|
const DataLayout* DL,
|
|
llvm::AssumptionCache* AC,
|
|
llvm::Instruction* CxtI)
|
|
{
|
|
#if LLVM_VERSION_MAJOR == 4
|
|
bool isKnownNegative = false;
|
|
bool isKnownPositive = false;
|
|
llvm::ComputeSignBit(
|
|
V,
|
|
isKnownPositive,
|
|
isKnownNegative,
|
|
*DL,
|
|
0,
|
|
AC,
|
|
CxtI);
|
|
return isKnownPositive;
|
|
#elif LLVM_VERSION_MAJOR >= 7
|
|
return computeKnownBits(
|
|
V,
|
|
*DL,
|
|
0,
|
|
AC,
|
|
CxtI).isNonNegative();
|
|
#endif
|
|
}
|
|
|
|
void appendToUsed(llvm::Module& M, ArrayRef<GlobalValue*> Values)
|
|
{
|
|
std::string Name = "llvm.used";
|
|
GlobalVariable* GV = M.getGlobalVariable(Name);
|
|
SmallPtrSet<Constant*, 16> InitAsSet;
|
|
SmallVector<Constant*, 16> Init;
|
|
if (GV) {
|
|
ConstantArray* CA = dyn_cast<ConstantArray>(GV->getInitializer());
|
|
for (auto& Op : CA->operands()) {
|
|
Constant* C = cast_or_null<Constant>(Op);
|
|
if (InitAsSet.insert(C).second)
|
|
Init.push_back(C);
|
|
}
|
|
GV->eraseFromParent();
|
|
}
|
|
|
|
Type* Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
|
|
for (auto* V : Values) {
|
|
Constant* C = V;
|
|
if (V->getType()->getAddressSpace() != 0)
|
|
C = ConstantExpr::getAddrSpaceCast(V, Int8PtrTy);
|
|
else
|
|
C = ConstantExpr::getBitCast(V, Int8PtrTy);
|
|
if (InitAsSet.insert(C).second)
|
|
Init.push_back(C);
|
|
}
|
|
|
|
if (Init.empty())
|
|
return;
|
|
|
|
ArrayType* ATy = ArrayType::get(Int8PtrTy, Init.size());
|
|
GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
|
|
ConstantArray::get(ATy, Init), Name);
|
|
GV->setSection("llvm.metadata");
|
|
}
|
|
|
|
bool safeScheduleUp(llvm::BasicBlock* BB, llvm::Value* V, llvm::Instruction*& InsertPos, llvm::DenseSet<llvm::Instruction*> Scheduled)
|
|
{
|
|
llvm::Instruction* I = llvm::dyn_cast<llvm::Instruction>(V);
|
|
if (!I)
|
|
return false;
|
|
|
|
// Skip value defined in other BBs.
|
|
if (I->getParent() != BB)
|
|
return false;
|
|
|
|
// Skip phi-node as they are eventually defined in other BBs.
|
|
if (llvm::isa<llvm::PHINode>(I))
|
|
return false;
|
|
|
|
// Skip for side effect instructions
|
|
if (I->mayHaveSideEffects())
|
|
return false;
|
|
|
|
// Don't re-schedule instruction again.
|
|
if (Scheduled.count(I)) {
|
|
if (InsertPos && !isInstPrecede(I, InsertPos))
|
|
InsertPos = I;
|
|
return false;
|
|
}
|
|
|
|
bool Changed = false;
|
|
|
|
// Try to schedule all its operands first.
|
|
for (auto OI = I->op_begin(), OE = I->op_end(); OI != OE; ++OI)
|
|
Changed |= safeScheduleUp(BB, OI->get(), InsertPos, Scheduled);
|
|
|
|
// Mark this instruction `visited`.
|
|
Scheduled.insert(I);
|
|
|
|
// Skip if the instruction is already defined before insertion position.
|
|
if (InsertPos && isInstPrecede(I, InsertPos))
|
|
return Changed;
|
|
|
|
// Schedule itself.
|
|
if (InsertPos) {
|
|
I->removeFromParent();
|
|
I->insertAfter(InsertPos);
|
|
}
|
|
|
|
InsertPos = I;
|
|
return true;
|
|
}
|
|
|
|
ConstantInt* getConstantSInt(
|
|
IRBuilder<>& Builder, const int bitSize, int64_t val)
|
|
{
|
|
ConstantInt* res = nullptr;
|
|
switch (bitSize) {
|
|
case 8: res = Builder.getInt8((uint8_t)val); break;
|
|
case 16: res = Builder.getInt16((uint16_t)val); break;
|
|
case 32: res = Builder.getInt32((uint32_t)val); break;
|
|
case 64: res = Builder.getInt64((uint64_t)val); break;
|
|
default:
|
|
IGC_ASSERT_MESSAGE(0, "invalid bitsize");
|
|
}
|
|
return res;
|
|
}
|
|
|
|
ConstantInt* getConstantUInt(
|
|
IRBuilder<>& Builder, const int bitSize, uint64_t val)
|
|
{
|
|
ConstantInt* res = nullptr;
|
|
switch (bitSize) {
|
|
case 8: res = Builder.getInt8((uint8_t)val); break;
|
|
case 16: res = Builder.getInt16((uint16_t)val); break;
|
|
case 32: res = Builder.getInt32((uint32_t)val); break;
|
|
case 64: res = Builder.getInt64(val); break;
|
|
default:
|
|
IGC_ASSERT_MESSAGE(0, "invalid bitsize");
|
|
}
|
|
return res;
|
|
}
|
|
|
|
// MulH implementation for 64-bit signed integers
|
|
Value* CreateMulhS64(IRBuilder<>& B, Value* const u, Value* const v) {
|
|
// This comes from Hacker's Delight 8-2.
|
|
// Think of this as elementry schoole multiplication, but base 2^32.
|
|
ConstantInt* const loMask = getConstantSInt(B, 64, 0xFFFFFFFFll);
|
|
ConstantInt* const hiShift = getConstantSInt(B, 64, 32);
|
|
//
|
|
// u64 u0 = u & 0xFFFFFFFF; s64 u1 = u >> 32;
|
|
// u64 v0 = v & 0xFFFFFFFF; s64 v1 = v >> 32;
|
|
Value* const u0 = B.CreateAnd(u, loMask, "u.lo32");
|
|
Value* const u1 = B.CreateAShr(u, hiShift, "u.hi32");
|
|
Value* const v0 = B.CreateAnd(v, loMask, "v.lo32");
|
|
Value* const v1 = B.CreateAShr(v, hiShift, "v.hi32");
|
|
//
|
|
// w = u0*v0
|
|
Value* const w0 = B.CreateMul(u0, v0, "w0");
|
|
//
|
|
// t = u1*v0 + (w0 >> 32)
|
|
Value* const tLHS = B.CreateMul(u1, v0);
|
|
Value* const tRHS = B.CreateLShr(w0, hiShift, "w0.lo32");
|
|
Value* const t = B.CreateAdd(tLHS, tRHS, "t");
|
|
//
|
|
// w1 = u0*v0 + (t >> 32)
|
|
Value* const u0v1 = B.CreateMul(u0, v1);
|
|
Value* const tLO32 = B.CreateAnd(t, loMask, "t.lo32");
|
|
Value* const w1 = B.CreateAdd(u0v1, tLO32, "w1");
|
|
//
|
|
// return u0*v1 + (t >> 32) + (w1 >> 32)
|
|
Value* const u1v1 = B.CreateMul(u1, v1);
|
|
Value* const tHI32 = B.CreateAShr(t, hiShift, "t.hi32");
|
|
Value* const rLHS = B.CreateAdd(u1v1, tHI32);
|
|
Value* const rRHS = B.CreateAShr(w1, hiShift, "w1.lo32");
|
|
Value* const r = B.CreateAdd(rLHS, rRHS, "uv");
|
|
//
|
|
return r;
|
|
}
|
|
|
|
// MulH implementation for 64-bit unsigned integers
|
|
Value* CreateMulhU64(IRBuilder<>& B, Value* const u, Value* const v)
|
|
{
|
|
// This is the same as CreateMulhS64, but with all logical shifts.
|
|
ConstantInt* const loMask = getConstantUInt(B, 64, 0xFFFFFFFFull);
|
|
ConstantInt* const hiShift = getConstantUInt(B, 64, 32);
|
|
//
|
|
// u64 u0 = u & 0xFFFFFFFF, u1 = u >> 32;
|
|
// u64 v0 = v & 0xFFFFFFFF, v1 = v >> 32;
|
|
Value* const u0 = B.CreateAnd(u, loMask, "u.lo32");
|
|
Value* const u1 = B.CreateLShr(u, hiShift, "u.hi32");
|
|
Value* const v0 = B.CreateAnd(v, loMask, "v.lo32");
|
|
Value* const v1 = B.CreateLShr(v, hiShift, "v.hi32");
|
|
//
|
|
// w0 = u0*v0
|
|
Value* const w0 = B.CreateMul(u0, v0, "w0");
|
|
//
|
|
// t = u1*v0 + (w0 >> 32)
|
|
Value* const tLHS = B.CreateMul(u1, v0);
|
|
Value* const tRHS = B.CreateLShr(w0, hiShift, "w0.lo32");
|
|
Value* const t = B.CreateAdd(tLHS, tRHS, "t");
|
|
//
|
|
// w1 = u0*v0 + (t >> 32)
|
|
Value* const u0v1 = B.CreateMul(u0, v1);
|
|
Value* const tLO32 = B.CreateAnd(t, loMask, "t.lo32");
|
|
Value* const w1 = B.CreateAdd(u0v1, tLO32, "w1");
|
|
//
|
|
// w1 = u0*v1 + (t >> 32) + (w1 >> 32)
|
|
Value* const u1v1 = B.CreateMul(u1, v1);
|
|
Value* const tHI32 = B.CreateLShr(t, hiShift, "t.hi32");
|
|
Value* const rLHS = B.CreateAdd(u1v1, tHI32);
|
|
Value* const rRHS = B.CreateLShr(w1, hiShift, "w1.lo32");
|
|
Value* const r = B.CreateAdd(rLHS, rRHS, "uv");
|
|
//
|
|
return r;
|
|
}
|
|
|
|
// MulH implementation for 32/64 bit integers
|
|
Value* CreateMulh(
|
|
Function& F,
|
|
IRBuilder<>& B,
|
|
const bool isSigned,
|
|
Value* const u,
|
|
Value* const v)
|
|
{
|
|
Value* res = nullptr;
|
|
IGC_ASSERT(nullptr != u);
|
|
IGC_ASSERT(nullptr != u->getType());
|
|
int bitWidth = u->getType()->getIntegerBitWidth();
|
|
switch(bitWidth)
|
|
{
|
|
case 32:
|
|
{
|
|
// we have a dedicated machine instruction for 32b
|
|
SmallVector<Value*, 2> imulhArgs;
|
|
imulhArgs.push_back(u);
|
|
imulhArgs.push_back(v);
|
|
auto intrinsic = isSigned ?
|
|
GenISAIntrinsic::GenISA_imulH :
|
|
GenISAIntrinsic::GenISA_umulH;
|
|
IGC_ASSERT(nullptr != v);
|
|
Function* const iMulhDecl = llvm::GenISAIntrinsic::getDeclaration(
|
|
F.getParent(),
|
|
intrinsic,
|
|
v->getType());
|
|
res = B.CreateCall(iMulhDecl, imulhArgs, "q_appx");
|
|
break;
|
|
}
|
|
case 64:
|
|
// emulate via 64b arithmetic
|
|
if (isSigned) {
|
|
res = CreateMulhS64(B, u, v);
|
|
}
|
|
else {
|
|
res = CreateMulhU64(B, u, v);
|
|
}
|
|
break;
|
|
default:
|
|
IGC_ASSERT_MESSAGE(0, "CreateMulH must be 32 or 64");
|
|
}
|
|
return res;
|
|
}
|
|
|
|
} // namespace IGC
|