Files
intel-graphics-compiler/IGC/Compiler/CISACodeGen/CISABuilder.hpp
Michał Krause 7c6de8c318 Use ZEBinary symbol tables instead of legacy types
Changed usages of legacy symbol table structures (m_funcSymbolTable) to ZEBinary structures (m_symbols).
Removed VC/LegacyInfoGeneration source files, as they're not needed after deprecating legacy relocations and symbols.
2025-08-07 23:59:08 +02:00

952 lines
46 KiB
C++

/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#pragma once
#include "Compiler/CISACodeGen/CISACodeGen.h"
#include "Compiler/CISACodeGen/CVariable.hpp"
#include "Compiler/CISACodeGen/PatternMatchPass.hpp"
#include "Compiler/CISACodeGen/helper.h"
#include "Compiler/CISACodeGen/GenCodeGenModule.h"
#include "visa_wa.h"
#include "inc/common/sku_wa.h"
namespace IGC {
class CShader;
struct SFlag {
CVariable *var;
e_predMode mode;
bool invertFlag;
void init() {
var = NULL;
mode = EPRED_NORMAL;
invertFlag = false;
}
};
struct SModifier {
uint16_t subReg;
uint8_t subVar;
uint8_t region[3];
e_modifier mod;
e_instance instance;
bool specialRegion;
void init() {
mod = EMOD_NONE;
subVar = 0;
subReg = 0;
instance = EINSTANCE_UNSPECIFIED;
specialRegion = false;
}
};
struct SAlias {
CVariable *m_rootVar;
VISA_Type m_type;
// If m_aliasVar != null, visa GenVar's alias offset must be set to
// m_aliasVar->aliasOffset, not zero.
CVariable *m_aliasVar;
SAlias(CVariable *var, VISA_Type type, CVariable *aliasVar = nullptr)
: m_rootVar(var), m_type(type), m_aliasVar(aliasVar) {}
};
struct SAliasMapInfo {
static inline SAlias getEmptyKey() { return SAlias(nullptr, ISA_TYPE_UD, nullptr); }
static inline SAlias getTombstoneKey() { return SAlias(nullptr, ISA_TYPE_D, nullptr); }
static unsigned getHashValue(const SAlias &Val) {
unsigned ty = (unsigned)Val.m_type;
if (Val.m_aliasVar)
ty += 0x011000;
return llvm::DenseMapInfo<CVariable *>::getHashValue(Val.m_rootVar) ^ ty;
}
static bool isEqual(const SAlias &LHS, const SAlias &RHS) {
if (!LHS.m_aliasVar && !RHS.m_aliasVar) {
// common case
return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type;
} else if (LHS.m_aliasVar && RHS.m_aliasVar) {
// inline asm only
return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type &&
LHS.m_aliasVar->GetAliasOffset() == RHS.m_aliasVar->GetAliasOffset();
}
return false;
}
};
/// Helps representing URB write channel masks in a way that provides type
/// safety and adapts to the channel mask format required by V-ISA interface.
class URBChannelMask {
public:
explicit URBChannelMask(unsigned int bitmask) : m_bitmask(bitmask) {}
/// Returns the size of bitmask,
/// defined as the position of the most significant bit with value 1.
/// E.g. size(10001) == 5, size(1) == 1 , size(1111) = 4
size_t size() const;
/// Returns channel mask in the format expected by V-ISA.
/// If the mask is full (i.e. consists of all 1) the return value must be 0xFF
/// that means 'no channel mask'. In other cases it is the actual stored mask
/// E.g. 1010 asVISAMask --> 1010, 111 asVISAMask --> 11111111 (full mask
/// case)
unsigned int asVISAMask() const;
// returns true if all channels are set (i.e., we can skip the channel mask)
bool isAllSet() const { return ((m_bitmask + 1) & m_bitmask) == 0; }
private:
unsigned int m_bitmask;
};
struct SEncoderState {
SModifier m_srcOperand[4];
SModifier m_dstOperand;
SFlag m_flag;
SIMDMode m_simdSize;
SIMDMode m_uniformSIMDSize;
e_mask m_mask;
bool m_noMask;
bool m_SubSpanDestination;
bool m_secondHalf;
bool m_secondNibble = false;
};
class CEncoder {
public:
void InitEncoder(bool canAbortOnSpill, bool hasStackCall, bool hasInlineAsmCall, bool hasAdditionalVisaAsmToLink,
int numThreadsPerEU, uint lowerBoundGRF, uint upperBoundGRF, VISAKernel *prevKernel);
void InitBuildParams(llvm::SmallVector<std::unique_ptr<const char, std::function<void(const char *)>>, 10> &params);
void InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbortOnSpill, bool hasStackCall,
bool enableVISA_IR);
SEncoderState CopyEncoderState();
void SetEncoderState(SEncoderState &newState);
VISA_Align GetVISAAlign(CVariable *var);
void SetAbortOnSpillThreshold(bool canAbortOnSpill, bool AllowSpill);
void SetMaxRegForThreadDispatch();
void SetDispatchSimdSize();
void SetSpillMemOffset();
void SetStackFunctionArgSize(uint size); // size in GRFs
void SetStackFunctionRetSize(uint size); // size in GRFs
void SetExternFunctionFlag();
void GetVISAPredefinedVar(CVariable *pVar, PreDefined_Vars var);
void CreateVISAVar(CVariable *var, bool UseAliasOffset = false);
void DeclareInput(CVariable *var, uint offset, uint instance);
void DeclarePred(CVariable *var, uint offset);
void MarkAsOutput(CVariable *var);
void MarkAsPayloadLiveOut(CVariable *var);
void MarkAsExclusiveLoad(CVariable *var);
void Compile(bool hasSymbolTable, GenXFunctionGroupAnalysis *&pFGA);
std::string GetShaderName();
const vISA::KernelCostInfo *kci;
CEncoder();
void SetProgram(CShader *program);
void Jump(CVariable *flag, uint label);
void AddLabel(uint label);
void AddDivergentResourceLoopLabel(uint label);
uint GetNewLabelID(const CName &name);
void DwordAtomicRaw(AtomicOp atomic_op, const ResourceDescriptor &bindingTableIndex, CVariable *dst,
CVariable *elem_offset, CVariable *src0, CVariable *src1, bool is16Bit = false);
void AtomicRawA64(AtomicOp atomic_op, const ResourceDescriptor &resource, CVariable *dst, CVariable *elem_offset,
CVariable *src0, CVariable *src1, unsigned short bitwidth);
void TypedAtomic(AtomicOp atomic_op, CVariable *dst, const ResourceDescriptor &resource, CVariable *pU, CVariable *pV,
CVariable *pR, CVariable *src0, CVariable *src1, CVariable *lod, bool is16Bit = false);
void Cmp(e_predicate p, CVariable *dst, CVariable *src0, CVariable *src1);
void Select(CVariable *flag, CVariable *dst, CVariable *src0, CVariable *src1);
void GenericAlu(e_opcode opcode, CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2 = nullptr);
void Send(CVariable *dst, CVariable *src, uint exDesc, CVariable *messDescriptor, bool isSendc = false);
void Send(CVariable *dst, CVariable *src, uint ffid, CVariable *exDesc, CVariable *messDescriptor,
bool isSendc = false);
void Sends(CVariable *dst, CVariable *src0, CVariable *src1, uint ffid, CVariable *exDesc, CVariable *messDescriptor,
bool isSendc = false, bool hasEOT = false);
void RenderTargetWrite(CVariable *var[], bool isUndefined[], bool lastRenderTarget, bool isNullRT, bool perSample,
bool coarseMode, bool headerMaskFromCe0,
CVariable *bindingTableIndex, CVariable *RTIndex, CVariable *source0Alpha, CVariable *oMask,
CVariable *depth, CVariable *stencil, CVariable *CPSCounter, CVariable *sampleIndex,
CVariable *r1Reg);
void Sample(EOPCODE subOpcode, uint writeMask, CVariable *offset, const ResourceDescriptor &bindingTableIndex,
const ResourceDescriptor &pairedesource, const SamplerDescriptor &SamplerIdx, uint numSources,
CVariable *dst, llvm::SmallVector<CVariable *, 4> &payload, bool zeroLOD, bool cpsEnable,
bool feedbackEnable, bool nonUniformState = false);
void Load(EOPCODE subOpcode, uint writeMask, CVariable *offset, const ResourceDescriptor &resource,
const ResourceDescriptor &pairedesource, uint numSources, CVariable *dst,
llvm::SmallVector<CVariable *, 4> &payload, bool zeroLOD, bool feedbackEnable);
void Info(EOPCODE subOpcode, uint writeMask, const ResourceDescriptor &resource, CVariable *lod, CVariable *dst);
void Gather4Inst(EOPCODE subOpcode, CVariable *offset, const ResourceDescriptor &resource,
const ResourceDescriptor &pairedesource, const SamplerDescriptor &sampler, uint numSources,
CVariable *dst, llvm::SmallVector<CVariable *, 4> &payload, uint channel, bool feedbackEnable);
void OWLoad(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, bool owordAligned,
uint bytesToBeRead, uint dstOffset = 0);
void OWStore(CVariable *data, e_predefSurface surfaceType, CVariable *bufidx, CVariable *offset, uint bytesToBeRead,
uint srcOffset);
void AddrAdd(CVariable *dst, CVariable *src0, CVariable *src1);
void Barrier(e_barrierKind BarrierKind);
void Fence(bool CommitEnable, bool L3_Flush_RW_Data, bool L3_Flush_Constant_Data, bool L3_Flush_Texture_Data,
bool L3_Flush_Instructions, bool Global_Mem_Fence, bool L1_Flush, bool SWFence);
void FlushSamplerCache();
void EOT();
void OWLoadA64(CVariable *dst, CVariable *offset, uint dstSize, uint dstOffset = 0);
void OWStoreA64(CVariable *dst, CVariable *offset, uint dstSize, uint srcOffset);
void MediaBlockMessage(ISA_Opcode subOpcode, CVariable *dst, e_predefSurface surfaceType, CVariable *buf,
CVariable *xOffset, CVariable *yOffset, uint modifier, unsigned char blockWidth,
unsigned char blockHeight, uint plane);
void GatherA64(CVariable *dst, CVariable *offset, unsigned elementSize, unsigned numElems);
VISA_VectorOpnd *GetVISALSCSurfaceOpnd(e_predefSurface surfaceType, CVariable *bti);
static LSC_DATA_SIZE LSC_GetElementSize(unsigned eSize, bool is2DBlockMsg = false);
static LSC_DATA_ELEMS LSC_GetElementNum(unsigned eNum);
static LSC_ADDR_TYPE getLSCAddrType(const ResourceDescriptor *resource);
static LSC_ADDR_TYPE getLSCAddrType(e_predefSurface surfaceType);
void LSC_LoadGather(LSC_OP subOp, CVariable *dst,
CVariable *offset, LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems, unsigned blockOffset,
ResourceDescriptor *resource, LSC_ADDR_SIZE addr_size, LSC_DATA_ORDER data_order, int immOffset,
int immScale, LSC_CACHE_OPTS cacheOpts, LSC_DOC_ADDR_SPACE addrSpace);
void LSC_StoreScatter(LSC_OP subOp,
CVariable *src, CVariable *offset, LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
unsigned blockOffset, ResourceDescriptor *resource, LSC_ADDR_SIZE addr_size,
LSC_DATA_ORDER data_order, int immOffset, int immScale, LSC_CACHE_OPTS cacheOpts,
LSC_DOC_ADDR_SPACE addrSpace);
void LSC_LoadBlock1D(CVariable *dst, CVariable *offset, LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
ResourceDescriptor *resource, LSC_ADDR_SIZE addrSize, int addrImmOffset,
LSC_CACHE_OPTS cacheOpts);
void LSC_StoreBlock1D(CVariable *src, CVariable *offset, LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
ResourceDescriptor *resource, LSC_ADDR_SIZE addrSize, int addrImmOffset,
LSC_CACHE_OPTS cacheOpts);
void LSC_AtomicRaw(AtomicOp atomic_op, CVariable *dst,
CVariable *offset, CVariable *src0, CVariable *src1, unsigned short bitwidth,
ResourceDescriptor *resource, LSC_ADDR_SIZE addr_size, int immOff, int immScale,
LSC_CACHE_OPTS cacheOpts);
void LSC_Fence(LSC_SFID sfid, LSC_SCOPE scope, LSC_FENCE_OP op);
void LSC_2DBlockMessage(LSC_OP subOp, ResourceDescriptor *resource, CVariable *dst, CVariable *bufId,
CVariable *xOffset, CVariable *yOffset, unsigned blockWidth, unsigned blockHeight,
unsigned elemSize, unsigned numBlocks, bool isTranspose, bool isVnni,
CVariable *flatImageBaseoffset, CVariable *flatImageWidth, CVariable *flatImageHeight,
CVariable *flatImagePitch,
LSC_CACHE_OPTS cacheOpts = {LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT});
void LSC_2DBlockMessage(LSC_OP subOp, CVariable *Dst, CVariable *AddrPayload, CVariable *Src, uint32_t ImmX,
uint32_t ImmY, uint32_t elemSize, uint32_t blockWidth, uint32_t blockHeight,
uint32_t numBlocks, bool isTranspose, bool isVnni,
LSC_CACHE_OPTS cacheOpts = {LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT});
void NamedBarrier(e_barrierKind BarrierKind, CVariable *src0, CVariable *src1, CVariable *src2, CVariable *src3);
void LSC_TypedReadWrite(LSC_OP subOp, ResourceDescriptor *resource, CVariable *pU, CVariable *pV, CVariable *pR,
CVariable *pLODorSampleIdx, CVariable *pSrcDst, unsigned elemSize, unsigned numElems,
LSC_ADDR_SIZE addr_size, int chMask,
LSC_CACHE_OPTS cacheOpts = {LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT});
void LSC_TypedAtomic(AtomicOp subOp, ResourceDescriptor *resource, CVariable *pU, CVariable *pV, CVariable *pR,
CVariable *pSrc0, CVariable *pSrc1, CVariable *pSrcDst, unsigned elemSize,
LSC_ADDR_SIZE addr_size, LSC_CACHE_OPTS cacheOpts);
void LSC_Typed2dBlock(LSC_OP subOpcode, CVariable *dst, e_predefSurface surfaceType, CVariable *buf,
CVariable *xOffset, CVariable *yOffset, int blockWidth, int blockHeight);
void LSC_UntypedAppendCounterAtomic(LSC_OP lscOp, ResourceDescriptor *resource, CVariable *dst, CVariable *src0);
void AppendBreakpoint();
void ScatterA64(CVariable *val, CVariable *offset, unsigned elementSize, unsigned numElems);
void ByteGather(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, unsigned elementSize,
unsigned numElems);
void ByteScatter(CVariable *src, const ResourceDescriptor &resource, CVariable *offset, unsigned elementSize,
unsigned numElems);
void Gather4Scaled(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, unsigned Mask = 0);
void Gather4ScaledNd(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, unsigned nd,
unsigned Mask = 0);
void Scatter4Scaled(CVariable *src, const ResourceDescriptor &resource, CVariable *offset);
void Gather4A64(CVariable *dst, CVariable *offset);
void Scatter4A64(CVariable *src, CVariable *offset);
void BoolToInt(CVariable *dst, CVariable *src);
void Copy(CVariable *dst, CVariable *src);
void SubroutineCall(CVariable *flag, llvm::Function *F);
void SubroutineRet(CVariable *flag, llvm::Function *F);
void StackCall(CVariable *flag, llvm::Function *F, unsigned char argSize, unsigned char retSize);
void IndirectStackCall(CVariable *flag, CVariable *funcPtr, unsigned char argSize, unsigned char retSize);
void StackRet(CVariable *flag);
void Loc(unsigned int line);
void File(std::string &s);
void PredAdd(CVariable *flag, CVariable *dst, CVariable *src0, CVariable *src1);
void DebugLinePlaceholder();
inline void Jump(uint label);
inline void Cast(CVariable *dst, CVariable *src);
inline void Add(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Bfi(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2, CVariable *src3);
inline void Bfe(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
inline void Bfrev(CVariable *dst, CVariable *src0);
inline void CBit(CVariable *dst, CVariable *src0);
inline void Fbh(CVariable *dst, CVariable *src0);
inline void Fbl(CVariable *dst, CVariable *src0);
inline void Mul(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Pow(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Div(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Shl(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Shr(CVariable *dst, CVariable *src0, CVariable *src1);
inline void MulH(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Cos(CVariable *dst, CVariable *src0);
inline void Sin(CVariable *dst, CVariable *src0);
inline void Log(CVariable *dst, CVariable *src0);
inline void Exp(CVariable *dst, CVariable *src0);
inline void Frc(CVariable *dst, CVariable *src0);
inline void Sqrt(CVariable *dst, CVariable *src0);
inline void Floor(CVariable *dst, CVariable *src0);
inline void Ceil(CVariable *dst, CVariable *src0);
inline void Ctlz(CVariable *dst, CVariable *src0);
inline void Truncate(CVariable *dst, CVariable *src0);
inline void RoundNE(CVariable *dst, CVariable *src0);
inline void Mod(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Rsqrt(CVariable *dst, CVariable *src0);
inline void Inv(CVariable *dst, CVariable *src0);
inline void Not(CVariable *dst, CVariable *src0);
// src0 * src1 + src2
inline void Madw(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
inline void Mad(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
inline void Lrp(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
inline void Xor(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Or(CVariable *dst, CVariable *src0, CVariable *src1);
inline void And(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Pln(CVariable *dst, CVariable *src0, CVariable *src1);
inline void SendC(CVariable *dst, CVariable *src, uint exDesc, CVariable *messDescriptor);
inline void SendC(CVariable *dst, CVariable *src, uint ffid, CVariable *exDesc, CVariable *messDescriptor);
inline void LoadMS(EOPCODE subOpcode, uint writeMask, CVariable *offset, const ResourceDescriptor &resource,
uint numSources, CVariable *dst, llvm::SmallVector<CVariable *, 4> &payload, bool feedbackEnable);
inline void SetP(CVariable *dst, CVariable *src);
inline void Gather(CVariable *dst, CVariable *bufidx, CVariable *offset, CVariable *gOffset, e_predefSurface surface,
int elementSize);
inline void TypedRead4(const ResourceDescriptor &resource, CVariable *pU, CVariable *pV, CVariable *pR,
CVariable *pLOD, CVariable *pDst, uint writeMask);
inline void TypedWrite4(const ResourceDescriptor &resource, CVariable *pU, CVariable *pV, CVariable *pR,
CVariable *pLOD, CVariable *pSrc, uint writeMask);
inline void Scatter(CVariable *val, CVariable *bufidx, CVariable *offset, CVariable *gOffset, e_predefSurface surface,
int elementSize);
inline void IShr(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Min(CVariable *dst, CVariable *src0, CVariable *src1);
inline void Max(CVariable *dst, CVariable *src0, CVariable *src1);
inline void UAddC(CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1);
inline void USubB(CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1);
inline void IEEESqrt(CVariable *dst, CVariable *src0);
inline void IEEEDivide(CVariable *dst, CVariable *src0, CVariable *src1);
void AddPair(CVariable *Lo, CVariable *Hi, CVariable *L0, CVariable *H0, CVariable *L1, CVariable *H1 = nullptr);
void SubPair(CVariable *Lo, CVariable *Hi, CVariable *L0, CVariable *H0, CVariable *L1, CVariable *H1);
inline void dp4a(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
void Lifetime(VISAVarLifetime StartOrEnd, CVariable *dst);
void dpas(CVariable *dst, CVariable *input, CVariable *weight, PrecisionType weight_precision, CVariable *actication,
PrecisionType activation_precision, uint8_t systolicDepth, uint8_t repeatCount, bool IsDpasw);
void fcvt(CVariable *dst, CVariable *src);
void srnd(CVariable *D, CVariable *S0, CVariable *R);
void Bfn(uint8_t booleanFuncCtrl, CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
void QWGather(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, unsigned elementSize,
unsigned numElems);
void QWScatter(CVariable *src, const ResourceDescriptor &resource, CVariable *offset, unsigned elementSize,
unsigned numElems);
// VME
void SendVmeIme(CVariable *bindingTableIndex, unsigned char streamMode, unsigned char searchControlMode,
CVariable *uniInputVar, CVariable *imeInputVar, CVariable *ref0Var, CVariable *ref1Var,
CVariable *costCenterVar, CVariable *outputVar);
void SendVmeFbr(CVariable *bindingTableIndex, CVariable *uniInputVar, CVariable *fbrInputVar, CVariable *FBRMbModeVar,
CVariable *FBRSubMbShapeVar, CVariable *FBRSubPredModeVar, CVariable *outputVar);
void SendVmeSic(CVariable *bindingTableIndex, CVariable *uniInputVar, CVariable *sicInputVar, CVariable *outputVar);
// VA
void SendVideoAnalytic(llvm::GenIntrinsicInst *inst, CVariable *vaResult, CVariable *coords, CVariable *size,
CVariable *srcImg, CVariable *sampler);
void SetDstSubVar(uint subVar);
void SetDstSubReg(uint subReg);
void SetSrcSubVar(uint srcNum, uint subVar);
void SetSrcSubReg(uint srcNum, uint subReg);
void SetDstModifier(e_modifier mod);
void SetDstModifier(const DstModifier &modifier);
void SetSrcModifier(uint srcNum, e_modifier mod);
void SetPredicate(CVariable *flag);
void SetInversePredicate(bool inv);
void SetPredicateMode(e_predMode mode);
void SetSrcRegion(uint srcNum, uint vStride, uint width, uint hStride, e_instance instance = EINSTANCE_UNSPECIFIED);
void SetDstRegion(uint hStride);
inline void SetNoMask();
inline void SetMask(e_mask mask);
inline void SetSimdSize(SIMDMode size);
inline SIMDMode GetSimdSize();
inline void SetUniformSIMDSize(SIMDMode size);
inline void SetSubSpanDestination(bool subspan);
inline bool IsSubSpanDestination();
inline void SetSecondHalf(bool secondHalf);
inline bool IsSecondHalf();
inline void SetSecondNibble(bool secondNibble);
inline bool IsSecondNibble();
inline void SetIsCodePatchCandidate(bool v);
inline bool IsCodePatchCandidate();
inline unsigned int GetPayloadEnd();
inline void SetPayloadEnd(unsigned int payloadEnd);
inline void SetHasPrevKernel(bool v);
inline bool HasPrevKernel();
inline void BeginForcedNoMaskRegion();
inline void EndForcedNoMaskRegion();
inline bool GetUniqueExclusiveLoad();
inline void SetUniqueExcusiveLoad(bool v);
void Wait();
VISAKernel *GetVISAKernel() const { return vKernel; }
VISABuilder *GetVISABuilder() const { return vbuilder; }
void Init();
void Push();
void initCR(VISAKernel *vKernel);
void SetVectorMask(bool vMask);
// Switches from actualRM to newRM
void SetRoundingMode_FP(ERoundingMode actualRM, ERoundingMode newRM);
void SetRoundingMode_FPCvtInt(ERoundingMode actualRM, ERoundingMode newRM);
static uint GetCISADataTypeSize(VISA_Type type) { return CVariable::GetCISADataTypeSize(type); }
static e_alignment GetCISADataTypeAlignment(VISA_Type type) { return CVariable::GetCISADataTypeAlignment(type); }
static VISASampler3DSubOpCode ConvertSubOpcode(EOPCODE subOpcode, bool zeroLOD);
// Wrappers for (potentially) common queries on types
static bool IsIntegerType(VISA_Type type);
static bool IsFloatType(VISA_Type type);
void SetVISAWaTable(WA_TABLE const &waTable);
/// \brief Initialize per function states and starts vISA emission
/// as a vISA subroutine
void BeginSubroutine(llvm::Function *F);
/// \brief Initialize per function states and starts vISA emission
/// as a vISA stack-call function
void BeginStackFunction(llvm::Function *F);
/// \brief Initialize interpolation section for vISA emission
void BeginPayloadSection();
void DestroyVISABuilder();
void AddVISASymbol(std::string &symName, CVariable *cvar);
std::string GetVariableName(CVariable *var);
std::string GetDumpFileName(std::string extension);
bool IsPayloadSectionAsPrimary() { return vKernel == vPayloadSection; }
void SetPayloadSectionAsPrimary() {
vKernelTmp = vKernel;
vKernel = vPayloadSection;
}
void SetPayloadSectionAsSecondary() { vKernel = vKernelTmp; }
std::string GetUniqueInlineAsmLabel();
bool IsVisaCompiledSuccessfully() const { return m_vIsaCompileStatus == VISA_SUCCESS; }
bool IsVisaCompileStatusFailure() const { return m_vIsaCompileStatus == VISA_FAILURE; }
private:
// helper functions
VISA_VectorOpnd *GetSourceOperand(CVariable *var, const SModifier &mod);
VISA_VectorOpnd *GetSourceOperandNoModifier(CVariable *var);
VISA_VectorOpnd *GetDestinationOperand(CVariable *var, const SModifier &mod);
VISA_RawOpnd *GetRawSource(CVariable *var, uint offset = 0);
VISA_RawOpnd *GetPairedResourceOperand(const ResourceDescriptor &pairedResource);
VISA_RawOpnd *GetRawDestination(CVariable *var, unsigned offset = 0);
VISA_PredOpnd *GetFlagOperand(const SFlag &flag);
VISA_StateOpndHandle *GetVISASurfaceOpnd(e_predefSurface surfaceType, CVariable *var);
VISA_StateOpndHandle *GetVISASurfaceOpnd(const ResourceDescriptor &resource);
VISA_LabelOpnd *GetOrCreateLabel(uint label, VISA_Label_Kind kind = LABEL_BLOCK);
VISA_LabelOpnd *GetFuncLabel(llvm::Function *F);
void InitLabelMap(const llvm::Function *F);
CName CreateVisaLabelName(const llvm::StringRef &L = "");
std::string CreateShortLabel(unsigned labelIndex) const;
// Compiler labels must start with something a user won't use in inline
// assembly.
static const char *GetCompilerLabelPrefix() { return "_"; }
VISAFunction *GetStackFunction(llvm::Function *F);
VISA_VectorOpnd *GetUniformSource(CVariable *var);
VISA_StateOpndHandle *GetBTIOperand(uint bindingTableIndex);
VISA_StateOpndHandle *GetSamplerOperand(CVariable *sampleIdx);
VISA_StateOpndHandle *GetSamplerOperand(const SamplerDescriptor &sampler);
void GetRowAndColOffset(CVariable *var, unsigned int subVar, unsigned int subreg, unsigned char &rowOff,
unsigned char &colOff);
VISA_GenVar *GetVISAVariable(CVariable *var);
VISA_GenVar *GetVISAVariable(CVariable *var, e_instance instance);
VISA_EMask_Ctrl ConvertMaskToVisaType(e_mask mask, bool noMask);
// Generic encoding functions
void MinMax(CISA_MIN_MAX_SUB_OPCODE subopcode, CVariable *dst, CVariable *src0, CVariable *src1);
void DataMov(ISA_Opcode opcode, CVariable *dst, CVariable *src);
void LogicOp(ISA_Opcode opcode, CVariable *dst, CVariable *src0, CVariable *src1 = nullptr, CVariable *src2 = nullptr,
CVariable *src3 = nullptr);
void Arithmetic(ISA_Opcode opcode, CVariable *dst, CVariable *src0 = nullptr, CVariable *src1 = nullptr,
CVariable *src2 = nullptr);
// dst may be special `null` CVariable, if only carry/borrow bits matter
void CarryBorrowArith(ISA_Opcode opcode, CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1);
void ScatterGather(ISA_Opcode opcode, CVariable *srcdst, CVariable *bufId, CVariable *offset, CVariable *gOffset,
e_predefSurface surface, int elementSize);
void TypedReadWrite(ISA_Opcode opcode, const ResourceDescriptor &resource, CVariable *pU, CVariable *pV,
CVariable *pR, CVariable *pLOD, CVariable *pSrcDst, uint writeMask);
VISA_Exec_Size GetAluExecSize(CVariable *dst) const;
VISA_EMask_Ctrl GetAluEMask(CVariable *dst);
bool IsSat();
bool isSamplerIdxLT16(const SamplerDescriptor &sampler);
// Variable splitting facilities (if crosses 2 GRF boundary).
bool NeedSplitting(CVariable *var, const SModifier &mod, unsigned &numParts, bool isSource = false) const;
SModifier SplitVariable(VISA_Exec_Size fromExecSize, VISA_Exec_Size toExecSize, unsigned thePart, CVariable *var,
const SModifier &mod, bool isSource = false) const;
VISA_Exec_Size SplitExecSize(VISA_Exec_Size fromExecSize, unsigned numParts) const;
VISA_EMask_Ctrl SplitEMask(VISA_Exec_Size fromExecSize, VISA_Exec_Size toExecSize, unsigned thePart,
VISA_EMask_Ctrl execMask) const;
// Split SIMD16 message data payload(MDP) for scattered/untyped write
// messages into two SIMD8 MDPs : V0 and V1.
void SplitPayloadToLowerSIMD(CVariable *MDP, uint32_t MDPOfst, uint32_t NumBlks, CVariable *V0, CVariable *V1,
uint32_t fromSize = 16);
// Merge two SIMD8 MDPs (V0 & V1) for scattered/untyped read messages into one
// SIMD16 message : MDP
void MergePayloadToHigherSIMD(CVariable *V0, CVariable *V1, uint32_t NumBlks, CVariable *MDP, uint32_t MDPOfst,
uint32_t toSize = 16);
// save compile time by avoiding retry if the amount of spill is (very) small
bool AvoidRetryOnSmallSpill() const;
// CreateSymbolTable
// Note that this function should be called only once even if there are
// multiple kernels in a program. Current IGC flow will create all symbols in
// the first kernel and all the other kernels won't contain symbols
typedef std::vector<std::pair<llvm::Value *, vISA::ZESymEntry>> ValueToSymbolList;
void CreateSymbolTable(ValueToSymbolList &symbolTableList);
// Create function symbols
void CreateFunctionSymbolTable(ValueToSymbolList &symbolTableList, SProgramOutput::ZEBinFuncSymbolTable &funcSyms);
// Create program symbols
void CreateProgramSymbolTable(ValueToSymbolList &symbolTableList,
SOpenCLProgramInfo::ZEBinProgramSymbolTable &programSyms);
// Create local symbols for kernels
void CreateLocalSymbol(const std::string &kernelName, vISA::GenSymType type, unsigned offset, unsigned size,
SProgramOutput::ZEBinFuncSymbolTable &symbols);
// input/output: relocations
void CreateRelocationTable(VISAKernel *pMainKernel, SProgramOutput::RelocListTy &relocations);
// CreateFuncAttributeTable
void CreateFuncAttributeTable(VISAKernel *pMainKernel, GenXFunctionGroupAnalysis *pFga);
// CreateGlobalHostAccessTable
typedef std::vector<vISA::HostAccessEntry> HostAccessList;
void CreateGlobalHostAccessTable(HostAccessList &hostAccessMap);
// input/output: global host access names
void CreateGlobalHostAccessTable(SOpenCLProgramInfo::ZEBinGlobalHostAccessTable &globalHostAccessTable);
uint32_t getGRFSize() const;
bool needsSplitting(VISA_Exec_Size ExecSize) const {
if (getGRFSize() == 64) {
return ExecSize == EXEC_SIZE_32;
}
return ExecSize == EXEC_SIZE_16;
}
// Note that GEN can set both fpCvtInt_rtz and any of FP rounding modes
// at the same time. If fpCvtInt uses a rounding mode other than rtz,
// they both uses FP rounding bits.
//
// RM bits in CR0.0.
// float RM bits: [5:4];
// int RM (float -> int): Bit 12: 0 -> rtz; 1 -> using Float RM
enum RMEncoding {
// float rounding mode (fp operations, cvt to fp)
RoundToNearestEven = 0x00,
RoundToPositive = 0x10,
RoundToNegative = 0x20,
RoundToZero = 0x30,
// int rounding mode (fp cvt int only), use FP RM for all rounding modes
// but rtz.
RoundToNearestEven_int = 0x1000,
RoundToPositive_int = 0x1010,
RoundToNegative_int = 0x1020,
RoundToZero_int_unused = 0x1030,
RoundToZero_int = 0x0000, // use this for rtz, bit 12 = 0
IntAndFPRoundingModeMask = 0x1030
};
void SetRoundingMode(RMEncoding actualRM, RMEncoding newRM);
// Get Encoding bit values for rounding mode
RMEncoding getEncoderRoundingMode_FP(ERoundingMode FP_RM);
RMEncoding getEncoderRoundingMode_FPCvtInt(ERoundingMode FCvtI_RM);
unsigned GetRawOpndSplitOffset(VISA_Exec_Size fromExecSize, VISA_Exec_Size toExecSize, unsigned thePart,
CVariable *var) const;
std::tuple<CVariable *, uint32_t> splitRawOperand(CVariable *var, bool isFirstHalf, VISA_EMask_Ctrl execMask);
uint32_t getNumChannels(CVariable *var) const;
void SaveOption(vISAOptions option, bool val);
void SaveOption(vISAOptions option, uint32_t val);
void SaveOption(vISAOptions option, const char *val);
void SetBuilderOptions(VISABuilder *pbuilder);
unsigned int GetSpillThreshold(SIMDMode simdmode);
private:
// helper functions for compile flow
/// shaderOverrideVISAFirstPass - pre-process shader overide dir for visa
/// shader override. return if there are visa files to be overriden
bool shaderOverrideVISAFirstPass(std::vector<std::string> &visaOverrideFiles, std::string &kernelName);
/// shaderOverrideVISASecondPassOrInlineAsm - handle visa inputs of shader
/// override or inline asm Return the VISAKernel built from overrided visa
/// or inline asm. Return nullptr if failed to create VISAKernel
VISAKernel *shaderOverrideVISASecondPassOrInlineAsm(bool visaAsmOverride, bool hasSymbolTable, bool emitVisaOnly,
const std::vector<const char *> *additionalVISAAsmToLink,
const std::vector<std::string> &visaOverrideFiles,
const std::string kernelName);
// setup m_retryManager according to jitinfo and other factors
void SetKernelRetryState(CodeGenContext *context, vISA::FINALIZER_INFO *jitInfo, GenXFunctionGroupAnalysis *&pFGA);
/// create symbol tables and GlobalHostAccessTable
void createSymbolAndGlobalHostAccessTables(bool hasSymbolTable, VISAKernel &pMainKernel, unsigned int scratchOffset);
/// create relocation tables according
void createRelocationTables(VISAKernel &pMainKernel);
/// Estimate vISA stack size according to FunctionGroup information.
/// Conservatively increase the stack size when having recursive or indirect
/// calls if the given function is a Function Group Head. Function group
/// heads are kernels/entry functions those their stack size will be used to
/// reported back to the Runtime, so we set a conservative value. For other
/// non-head functions, we set the precise stack usage of the function.
///
/// For group head functions, when having the stack call, the spillSize
/// returned by vISA is the sum of required stack size of all potential
/// callee and without considering recursive or indirect calls.
uint32_t getSpillMemSizeWithFG(const llvm::Function &curFunc, uint32_t curSize, GenXFunctionGroupAnalysis *fga,
uint32_t gtpinScratchUse);
const vISA::KernelCostInfo *createKernelCostInfo(VISAKernel &pMainKenrel);
protected:
// encoder states
SEncoderState m_encoderState = {};
llvm::DenseMap<SAlias, CVariable *, SAliasMapInfo> m_aliasesMap;
// vISA needs its own Wa-table as some of the W/A are applicable
// only to certain APIs/shader types/reg key settings/etc.
WA_TABLE m_vISAWaTable = {};
enum OpType { ET_BOOL, ET_INT32, ET_CSTR };
struct OptionValue {
OpType type;
bool vBool;
uint32_t vInt32;
const char *vCstr;
};
// List of vISA user options
std::vector<std::pair<vISAOptions, OptionValue>> m_visaUserOptions;
// Typically IGC just use ones vKernel for every vISA::compile call,
// in those cases, vKernel and vMainKernel should be the same.
// Only when using stack-call, vKernel pointer changes every time
// IGC addes a vISA kernel or function object, but the vMainKernel
// always pointing to the first kernel added during InitEncoder.
VISAKernel *vKernel;
VISAKernel *vMainKernel;
VISABuilder *vbuilder;
VISABuilder *vAsmTextBuilder;
// This is for CodePatch to split payload interpolation from a shader
VISAKernel *vPayloadSection;
VISAKernel *vKernelTmp;
bool m_hasPrevKernel = false;
unsigned int m_payloadEnd = 0;
unsigned int m_argumentStackSize = 0;
bool m_isCodePatchCandidate = false;
bool m_hasUniqueExclusiveLoad = false;
int m_nestLevelForcedNoMaskRegion = 0;
bool m_enableVISAdump = false;
bool m_hasInlineAsm = false;
std::vector<VISA_LabelOpnd *> labelMap;
std::vector<CName> labelNameMap; // parallel to labelMap
/// Per kernel label counter
unsigned labelCounter = 0;
/// Per kernel label counter for each inline asm block
unsigned labelInlineAsmCounter = 0;
/// Each kernel might emit several functions;
/// we pre-increment this for each new function we process (InitLabelMap)
/// The first function will see 0, ...
unsigned labelFunctionIndex = (unsigned)-1;
///
/// The name of the current function; set if we are emitting labels
CName currFunctionName;
/// Keep a map between a function and its label, per kernel state.
llvm::MapVector<llvm::Function *, VISA_LabelOpnd *> funcLabelMap;
/// Keep a map between a stack-called function and the corresponding vISA
/// function
llvm::MapVector<llvm::Function *, VISAFunction *> stackFuncMap;
// dummy variables
VISA_SurfaceVar *dummySurface;
VISA_SamplerVar *samplervar;
CShader *m_program;
int m_vIsaCompileStatus = VISA_FAILURE;
// Keep a map between a function and its per-function attributes needed for
// function pointer support
struct FuncAttrib {
bool isKernel = false;
bool hasBarrier = false;
unsigned argumentStackSize = 0;
unsigned allocaStackSize = 0;
};
llvm::MapVector<llvm::Function *, FuncAttrib> funcAttributeMap;
public:
// Used by EmitVISAPass to set function attributes
void InitFuncAttribute(llvm::Function *F, bool isKernel = false) { funcAttributeMap[F].isKernel = isKernel; }
void SetFunctionHasBarrier(llvm::Function *F) {
if (funcAttributeMap.find(F) != funcAttributeMap.end())
funcAttributeMap[F].hasBarrier = true;
}
void SetFunctionMaxArgumentStackSize(llvm::Function *F, unsigned size) {
if (funcAttributeMap.find(F) != funcAttributeMap.end())
m_argumentStackSize = funcAttributeMap[F].argumentStackSize = MAX(funcAttributeMap[F].argumentStackSize, size);
}
void SetFunctionAllocaStackSize(llvm::Function *F, unsigned size) {
if (funcAttributeMap.find(F) != funcAttributeMap.end())
funcAttributeMap[F].allocaStackSize = size;
}
};
inline void CEncoder::Jump(uint label) { Jump(NULL, label); }
inline void CEncoder::Bfi(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2, CVariable *src3) {
LogicOp(ISA_BFI, dst, src0, src1, src2, src3);
}
inline void CEncoder::Bfe(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
LogicOp(ISA_BFE, dst, src0, src1, src2);
}
inline void CEncoder::Bfrev(CVariable *dst, CVariable *src0) { LogicOp(ISA_BFREV, dst, src0); }
inline void CEncoder::CBit(CVariable *dst, CVariable *src) { LogicOp(ISA_CBIT, dst, src); }
inline void CEncoder::Fbh(CVariable *dst, CVariable *src) { LogicOp(ISA_FBH, dst, src); }
inline void CEncoder::Fbl(CVariable *dst, CVariable *src) { LogicOp(ISA_FBL, dst, src); }
inline void CEncoder::Mul(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_MUL, dst, src0, src1); }
inline void CEncoder::Pow(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_POW, dst, src0, src1); }
inline void CEncoder::Div(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_DIV, dst, src0, src1); }
inline void CEncoder::Add(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_ADD, dst, src0, src1); }
inline void CEncoder::Shl(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_SHL, dst, src0, src1); }
inline void CEncoder::IShr(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_ASR, dst, src0, src1); }
inline void CEncoder::Shr(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_SHR, dst, src0, src1); }
inline void CEncoder::MulH(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_MULH, dst, src0, src1); }
inline void CEncoder::Cos(CVariable *dst, CVariable *src0) { Arithmetic(ISA_COS, dst, src0); }
inline void CEncoder::Sin(CVariable *dst, CVariable *src0) { Arithmetic(ISA_SIN, dst, src0); }
inline void CEncoder::Log(CVariable *dst, CVariable *src0) { Arithmetic(ISA_LOG, dst, src0); }
inline void CEncoder::Exp(CVariable *dst, CVariable *src0) { Arithmetic(ISA_EXP, dst, src0); }
inline void CEncoder::Sqrt(CVariable *dst, CVariable *src0) { Arithmetic(ISA_SQRT, dst, src0); }
inline void CEncoder::Floor(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RNDD, dst, src0); }
inline void CEncoder::Ceil(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RNDU, dst, src0); }
inline void CEncoder::Ctlz(CVariable *dst, CVariable *src0) { Arithmetic(ISA_LZD, dst, src0); }
inline void CEncoder::Truncate(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RNDZ, dst, src0); }
inline void CEncoder::RoundNE(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RNDE, dst, src0); }
inline void CEncoder::Mod(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_MOD, dst, src0, src1); }
inline void CEncoder::Rsqrt(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RSQRT, dst, src0); }
inline void CEncoder::Inv(CVariable *dst, CVariable *src0) { Arithmetic(ISA_INV, dst, src0); }
inline void CEncoder::Not(CVariable *dst, CVariable *src0) { Arithmetic(ISA_NOT, dst, src0); }
inline void CEncoder::Frc(CVariable *dst, CVariable *src0) { Arithmetic(ISA_FRC, dst, src0); }
inline void CEncoder::Pln(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_PLANE, dst, src0, src1); }
inline void CEncoder::Cast(CVariable *dst, CVariable *src) { DataMov(ISA_MOV, dst, src); }
// src0 * src1 + src2
inline void CEncoder::Madw(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
Arithmetic(ISA_MADW, dst, src0, src1, src2);
}
// src0 * src1 + src2
inline void CEncoder::Mad(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
Arithmetic(ISA_MAD, dst, src0, src1, src2);
}
inline void CEncoder::Lrp(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
Arithmetic(ISA_LRP, dst, src0, src1, src2);
}
inline void CEncoder::Xor(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_XOR, dst, src0, src1); }
inline void CEncoder::Or(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_OR, dst, src0, src1); }
inline void CEncoder::And(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_AND, dst, src0, src1); }
inline void CEncoder::SetP(CVariable *dst, CVariable *src0) {
// We always need no mask when doing a set predicate
m_encoderState.m_noMask = true;
DataMov(ISA_SETP, dst, src0);
}
inline void CEncoder::Min(CVariable *dst, CVariable *src0, CVariable *src1) { MinMax(CISA_DM_FMIN, dst, src0, src1); }
inline void CEncoder::Max(CVariable *dst, CVariable *src0, CVariable *src1) { MinMax(CISA_DM_FMAX, dst, src0, src1); }
inline void CEncoder::UAddC(CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1) {
CarryBorrowArith(ISA_ADDC, dst, dstCarryBorrow, src0, src1);
}
inline void CEncoder::USubB(CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1) {
CarryBorrowArith(ISA_SUBB, dst, dstCarryBorrow, src0, src1);
}
inline void CEncoder::LoadMS(EOPCODE subOpcode, uint writeMask, CVariable *offset, const ResourceDescriptor &resource,
uint numSources, CVariable *dst, llvm::SmallVector<CVariable *, 4> &payload,
bool feedbackEnable) {
ResourceDescriptor pairedResource{};
Load(subOpcode, writeMask, offset, resource, pairedResource, numSources, dst, payload, false, feedbackEnable);
}
inline void CEncoder::Gather(CVariable *dst, CVariable *bufId, CVariable *offset, CVariable *gOffset,
e_predefSurface surface, int elementSize) {
ScatterGather(ISA_GATHER, dst, bufId, offset, gOffset, surface, elementSize);
}
inline void CEncoder::TypedRead4(const ResourceDescriptor &resource, CVariable *pU, CVariable *pV, CVariable *pR,
CVariable *pLOD, CVariable *pDst, uint writeMask) {
TypedReadWrite(ISA_GATHER4_TYPED, resource, pU, pV, pR, pLOD, pDst, writeMask);
}
inline void CEncoder::TypedWrite4(const ResourceDescriptor &resource, CVariable *pU, CVariable *pV, CVariable *pR,
CVariable *pLOD, CVariable *pSrc, uint writeMask) {
TypedReadWrite(ISA_SCATTER4_TYPED, resource, pU, pV, pR, pLOD, pSrc, writeMask);
}
inline void CEncoder::Scatter(CVariable *val, CVariable *bufidx, CVariable *offset, CVariable *gOffset,
e_predefSurface surface, int elementSize) {
ScatterGather(ISA_SCATTER, val, bufidx, offset, gOffset, surface, elementSize);
}
inline void CEncoder::SendC(CVariable *dst, CVariable *src, uint exDesc, CVariable *messDescriptor) {
Send(dst, src, exDesc, messDescriptor, true);
}
inline void CEncoder::SendC(CVariable *dst, CVariable *src, uint ffid, CVariable *exDesc, CVariable *messDescriptor) {
Send(dst, src, ffid, exDesc, messDescriptor, true);
}
inline void CEncoder::IEEESqrt(CVariable *dst, CVariable *src0) { Arithmetic(ISA_SQRTM, dst, src0); }
inline void CEncoder::IEEEDivide(CVariable *dst, CVariable *src0, CVariable *src1) {
Arithmetic(ISA_DIVM, dst, src0, src1);
}
inline void CEncoder::dp4a(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
Arithmetic(ISA_DP4A, dst, src0, src1, src2);
}
inline void CEncoder::SetIsCodePatchCandidate(bool v) { m_isCodePatchCandidate = v; }
inline bool CEncoder::IsCodePatchCandidate() { return m_isCodePatchCandidate; }
inline void CEncoder::SetPayloadEnd(unsigned int payloadEnd) { m_payloadEnd = payloadEnd; }
inline unsigned int CEncoder::GetPayloadEnd() { return m_payloadEnd; }
inline void CEncoder::SetHasPrevKernel(bool v) { m_hasPrevKernel = v; }
inline bool CEncoder::HasPrevKernel() { return m_hasPrevKernel; }
inline bool CEncoder::GetUniqueExclusiveLoad() { return m_hasUniqueExclusiveLoad; }
inline void CEncoder::SetUniqueExcusiveLoad(bool v) { m_hasUniqueExclusiveLoad = v; }
inline void CEncoder::BeginForcedNoMaskRegion() {
++m_nestLevelForcedNoMaskRegion;
// Start submitting insts with NoMask control
m_encoderState.m_noMask = true;
}
inline void CEncoder::EndForcedNoMaskRegion() {
--m_nestLevelForcedNoMaskRegion;
IGC_ASSERT_MESSAGE(m_nestLevelForcedNoMaskRegion >= 0, "Invalid nesting of Unmasked regions");
// Out of unmasked region, return to submitting insts
// with Mask control
if (m_nestLevelForcedNoMaskRegion == 0)
m_encoderState.m_noMask = false;
}
inline void CEncoder::SetNoMask() { m_encoderState.m_noMask = true; }
inline void CEncoder::SetMask(e_mask mask) { m_encoderState.m_mask = mask; }
inline void CEncoder::SetSimdSize(SIMDMode size) { m_encoderState.m_simdSize = size; }
inline SIMDMode CEncoder::GetSimdSize() { return m_encoderState.m_simdSize; }
inline void CEncoder::SetUniformSIMDSize(SIMDMode size) { m_encoderState.m_uniformSIMDSize = size; }
inline void CEncoder::SetSubSpanDestination(bool subspan) { m_encoderState.m_SubSpanDestination = subspan; }
inline void CEncoder::SetSecondHalf(bool secondHalf) { m_encoderState.m_secondHalf = secondHalf; }
inline bool CEncoder::IsSecondHalf() { return m_encoderState.m_secondHalf; }
inline void CEncoder::SetSecondNibble(bool secondNibble) { m_encoderState.m_secondNibble = secondNibble; }
inline bool CEncoder::IsSecondNibble() { return m_encoderState.m_secondNibble; }
inline bool CEncoder::IsSubSpanDestination() { return m_encoderState.m_SubSpanDestination; }
VISA_Modifier ConvertModifierToVisaType(e_modifier modifier);
VISA_Cond_Mod ConvertCondModToVisaType(e_predicate condMod);
VISA_Oword_Num ConvertSizeToVisaType(uint size);
VISAChannelMask ConvertChannelMaskToVisaType(uint mask);
VISASourceSingleChannel ConvertSingleSourceChannel(uint srcChannel);
GenPrecision ConvertPrecisionToVisaType(PrecisionType P);
} // namespace IGC