mirror of
				https://github.com/intel/intel-graphics-compiler.git
				synced 2025-10-30 08:18:26 +08:00 
			
		
		
		
	 7c6de8c318
			
		
	
	7c6de8c318
	
	
	
		
			
			Changed usages of legacy symbol table structures (m_funcSymbolTable) to ZEBinary structures (m_symbols). Removed VC/LegacyInfoGeneration source files, as they're not needed after deprecating legacy relocations and symbols.
		
			
				
	
	
		
			952 lines
		
	
	
		
			46 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			952 lines
		
	
	
		
			46 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*========================== begin_copyright_notice ============================
 | |
| 
 | |
| Copyright (C) 2017-2021 Intel Corporation
 | |
| 
 | |
| SPDX-License-Identifier: MIT
 | |
| 
 | |
| ============================= end_copyright_notice ===========================*/
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include "Compiler/CISACodeGen/CISACodeGen.h"
 | |
| #include "Compiler/CISACodeGen/CVariable.hpp"
 | |
| #include "Compiler/CISACodeGen/PatternMatchPass.hpp"
 | |
| #include "Compiler/CISACodeGen/helper.h"
 | |
| #include "Compiler/CISACodeGen/GenCodeGenModule.h"
 | |
| #include "visa_wa.h"
 | |
| #include "inc/common/sku_wa.h"
 | |
| 
 | |
| namespace IGC {
 | |
| class CShader;
 | |
| 
 | |
| struct SFlag {
 | |
|   CVariable *var;
 | |
|   e_predMode mode;
 | |
|   bool invertFlag;
 | |
|   void init() {
 | |
|     var = NULL;
 | |
|     mode = EPRED_NORMAL;
 | |
|     invertFlag = false;
 | |
|   }
 | |
| };
 | |
| 
 | |
| struct SModifier {
 | |
|   uint16_t subReg;
 | |
|   uint8_t subVar;
 | |
|   uint8_t region[3];
 | |
|   e_modifier mod;
 | |
|   e_instance instance;
 | |
|   bool specialRegion;
 | |
|   void init() {
 | |
|     mod = EMOD_NONE;
 | |
|     subVar = 0;
 | |
|     subReg = 0;
 | |
|     instance = EINSTANCE_UNSPECIFIED;
 | |
|     specialRegion = false;
 | |
|   }
 | |
| };
 | |
| 
 | |
| struct SAlias {
 | |
|   CVariable *m_rootVar;
 | |
|   VISA_Type m_type;
 | |
|   // If m_aliasVar != null, visa GenVar's alias offset must be set to
 | |
|   // m_aliasVar->aliasOffset, not zero.
 | |
|   CVariable *m_aliasVar;
 | |
|   SAlias(CVariable *var, VISA_Type type, CVariable *aliasVar = nullptr)
 | |
|       : m_rootVar(var), m_type(type), m_aliasVar(aliasVar) {}
 | |
| };
 | |
| 
 | |
| struct SAliasMapInfo {
 | |
|   static inline SAlias getEmptyKey() { return SAlias(nullptr, ISA_TYPE_UD, nullptr); }
 | |
|   static inline SAlias getTombstoneKey() { return SAlias(nullptr, ISA_TYPE_D, nullptr); }
 | |
|   static unsigned getHashValue(const SAlias &Val) {
 | |
|     unsigned ty = (unsigned)Val.m_type;
 | |
|     if (Val.m_aliasVar)
 | |
|       ty += 0x011000;
 | |
|     return llvm::DenseMapInfo<CVariable *>::getHashValue(Val.m_rootVar) ^ ty;
 | |
|   }
 | |
|   static bool isEqual(const SAlias &LHS, const SAlias &RHS) {
 | |
|     if (!LHS.m_aliasVar && !RHS.m_aliasVar) {
 | |
|       // common case
 | |
|       return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type;
 | |
|     } else if (LHS.m_aliasVar && RHS.m_aliasVar) {
 | |
|       // inline asm only
 | |
|       return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type &&
 | |
|              LHS.m_aliasVar->GetAliasOffset() == RHS.m_aliasVar->GetAliasOffset();
 | |
|     }
 | |
|     return false;
 | |
|   }
 | |
| };
 | |
| 
 | |
| /// Helps representing URB write channel masks in a way that provides type
 | |
| /// safety and adapts to the channel mask format required by V-ISA interface.
 | |
| class URBChannelMask {
 | |
| public:
 | |
|   explicit URBChannelMask(unsigned int bitmask) : m_bitmask(bitmask) {}
 | |
| 
 | |
|   /// Returns the size of bitmask,
 | |
|   /// defined as the position of the most significant bit with value 1.
 | |
|   /// E.g. size(10001) == 5, size(1) == 1 , size(1111) = 4
 | |
|   size_t size() const;
 | |
| 
 | |
|   /// Returns channel mask in the format expected by V-ISA.
 | |
|   /// If the mask is full (i.e. consists of all 1) the return value must be 0xFF
 | |
|   /// that means 'no channel mask'. In other cases it is the actual stored mask
 | |
|   /// E.g. 1010 asVISAMask --> 1010, 111 asVISAMask --> 11111111 (full mask
 | |
|   /// case)
 | |
|   unsigned int asVISAMask() const;
 | |
| 
 | |
|   // returns true if all channels are set (i.e., we can skip the channel mask)
 | |
|   bool isAllSet() const { return ((m_bitmask + 1) & m_bitmask) == 0; }
 | |
| 
 | |
| private:
 | |
|   unsigned int m_bitmask;
 | |
| };
 | |
| 
 | |
| struct SEncoderState {
 | |
|   SModifier m_srcOperand[4];
 | |
|   SModifier m_dstOperand;
 | |
|   SFlag m_flag;
 | |
|   SIMDMode m_simdSize;
 | |
|   SIMDMode m_uniformSIMDSize;
 | |
|   e_mask m_mask;
 | |
|   bool m_noMask;
 | |
|   bool m_SubSpanDestination;
 | |
|   bool m_secondHalf;
 | |
|   bool m_secondNibble = false;
 | |
| };
 | |
| 
 | |
| class CEncoder {
 | |
| public:
 | |
|   void InitEncoder(bool canAbortOnSpill, bool hasStackCall, bool hasInlineAsmCall, bool hasAdditionalVisaAsmToLink,
 | |
|                    int numThreadsPerEU, uint lowerBoundGRF, uint upperBoundGRF, VISAKernel *prevKernel);
 | |
|   void InitBuildParams(llvm::SmallVector<std::unique_ptr<const char, std::function<void(const char *)>>, 10> ¶ms);
 | |
|   void InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbortOnSpill, bool hasStackCall,
 | |
|                               bool enableVISA_IR);
 | |
|   SEncoderState CopyEncoderState();
 | |
|   void SetEncoderState(SEncoderState &newState);
 | |
|   VISA_Align GetVISAAlign(CVariable *var);
 | |
| 
 | |
|   void SetAbortOnSpillThreshold(bool canAbortOnSpill, bool AllowSpill);
 | |
|   void SetMaxRegForThreadDispatch();
 | |
|   void SetDispatchSimdSize();
 | |
|   void SetSpillMemOffset();
 | |
|   void SetStackFunctionArgSize(uint size); // size in GRFs
 | |
|   void SetStackFunctionRetSize(uint size); // size in GRFs
 | |
|   void SetExternFunctionFlag();
 | |
| 
 | |
|   void GetVISAPredefinedVar(CVariable *pVar, PreDefined_Vars var);
 | |
|   void CreateVISAVar(CVariable *var, bool UseAliasOffset = false);
 | |
|   void DeclareInput(CVariable *var, uint offset, uint instance);
 | |
|   void DeclarePred(CVariable *var, uint offset);
 | |
|   void MarkAsOutput(CVariable *var);
 | |
|   void MarkAsPayloadLiveOut(CVariable *var);
 | |
|   void MarkAsExclusiveLoad(CVariable *var);
 | |
|   void Compile(bool hasSymbolTable, GenXFunctionGroupAnalysis *&pFGA);
 | |
|   std::string GetShaderName();
 | |
|   const vISA::KernelCostInfo *kci;
 | |
| 
 | |
|   CEncoder();
 | |
|   void SetProgram(CShader *program);
 | |
|   void Jump(CVariable *flag, uint label);
 | |
|   void AddLabel(uint label);
 | |
|   void AddDivergentResourceLoopLabel(uint label);
 | |
|   uint GetNewLabelID(const CName &name);
 | |
|   void DwordAtomicRaw(AtomicOp atomic_op, const ResourceDescriptor &bindingTableIndex, CVariable *dst,
 | |
|                       CVariable *elem_offset, CVariable *src0, CVariable *src1, bool is16Bit = false);
 | |
|   void AtomicRawA64(AtomicOp atomic_op, const ResourceDescriptor &resource, CVariable *dst, CVariable *elem_offset,
 | |
|                     CVariable *src0, CVariable *src1, unsigned short bitwidth);
 | |
|   void TypedAtomic(AtomicOp atomic_op, CVariable *dst, const ResourceDescriptor &resource, CVariable *pU, CVariable *pV,
 | |
|                    CVariable *pR, CVariable *src0, CVariable *src1, CVariable *lod, bool is16Bit = false);
 | |
|   void Cmp(e_predicate p, CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   void Select(CVariable *flag, CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   void GenericAlu(e_opcode opcode, CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2 = nullptr);
 | |
|   void Send(CVariable *dst, CVariable *src, uint exDesc, CVariable *messDescriptor, bool isSendc = false);
 | |
|   void Send(CVariable *dst, CVariable *src, uint ffid, CVariable *exDesc, CVariable *messDescriptor,
 | |
|             bool isSendc = false);
 | |
|   void Sends(CVariable *dst, CVariable *src0, CVariable *src1, uint ffid, CVariable *exDesc, CVariable *messDescriptor,
 | |
|              bool isSendc = false, bool hasEOT = false);
 | |
| 
 | |
|   void RenderTargetWrite(CVariable *var[], bool isUndefined[], bool lastRenderTarget, bool isNullRT, bool perSample,
 | |
|                          bool coarseMode, bool headerMaskFromCe0,
 | |
|                          CVariable *bindingTableIndex, CVariable *RTIndex, CVariable *source0Alpha, CVariable *oMask,
 | |
|                          CVariable *depth, CVariable *stencil, CVariable *CPSCounter, CVariable *sampleIndex,
 | |
|                          CVariable *r1Reg);
 | |
|   void Sample(EOPCODE subOpcode, uint writeMask, CVariable *offset, const ResourceDescriptor &bindingTableIndex,
 | |
|               const ResourceDescriptor &pairedesource, const SamplerDescriptor &SamplerIdx, uint numSources,
 | |
|               CVariable *dst, llvm::SmallVector<CVariable *, 4> &payload, bool zeroLOD, bool cpsEnable,
 | |
|               bool feedbackEnable, bool nonUniformState = false);
 | |
|   void Load(EOPCODE subOpcode, uint writeMask, CVariable *offset, const ResourceDescriptor &resource,
 | |
|             const ResourceDescriptor &pairedesource, uint numSources, CVariable *dst,
 | |
|             llvm::SmallVector<CVariable *, 4> &payload, bool zeroLOD, bool feedbackEnable);
 | |
| 
 | |
|   void Info(EOPCODE subOpcode, uint writeMask, const ResourceDescriptor &resource, CVariable *lod, CVariable *dst);
 | |
| 
 | |
|   void Gather4Inst(EOPCODE subOpcode, CVariable *offset, const ResourceDescriptor &resource,
 | |
|                    const ResourceDescriptor &pairedesource, const SamplerDescriptor &sampler, uint numSources,
 | |
|                    CVariable *dst, llvm::SmallVector<CVariable *, 4> &payload, uint channel, bool feedbackEnable);
 | |
| 
 | |
|   void OWLoad(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, bool owordAligned,
 | |
|               uint bytesToBeRead, uint dstOffset = 0);
 | |
|   void OWStore(CVariable *data, e_predefSurface surfaceType, CVariable *bufidx, CVariable *offset, uint bytesToBeRead,
 | |
|                uint srcOffset);
 | |
| 
 | |
|   void AddrAdd(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   void Barrier(e_barrierKind BarrierKind);
 | |
|   void Fence(bool CommitEnable, bool L3_Flush_RW_Data, bool L3_Flush_Constant_Data, bool L3_Flush_Texture_Data,
 | |
|              bool L3_Flush_Instructions, bool Global_Mem_Fence, bool L1_Flush, bool SWFence);
 | |
|   void FlushSamplerCache();
 | |
|   void EOT();
 | |
|   void OWLoadA64(CVariable *dst, CVariable *offset, uint dstSize, uint dstOffset = 0);
 | |
|   void OWStoreA64(CVariable *dst, CVariable *offset, uint dstSize, uint srcOffset);
 | |
|   void MediaBlockMessage(ISA_Opcode subOpcode, CVariable *dst, e_predefSurface surfaceType, CVariable *buf,
 | |
|                          CVariable *xOffset, CVariable *yOffset, uint modifier, unsigned char blockWidth,
 | |
|                          unsigned char blockHeight, uint plane);
 | |
|   void GatherA64(CVariable *dst, CVariable *offset, unsigned elementSize, unsigned numElems);
 | |
|   VISA_VectorOpnd *GetVISALSCSurfaceOpnd(e_predefSurface surfaceType, CVariable *bti);
 | |
|   static LSC_DATA_SIZE LSC_GetElementSize(unsigned eSize, bool is2DBlockMsg = false);
 | |
|   static LSC_DATA_ELEMS LSC_GetElementNum(unsigned eNum);
 | |
|   static LSC_ADDR_TYPE getLSCAddrType(const ResourceDescriptor *resource);
 | |
|   static LSC_ADDR_TYPE getLSCAddrType(e_predefSurface surfaceType);
 | |
|   void LSC_LoadGather(LSC_OP subOp, CVariable *dst,
 | |
|                       CVariable *offset, LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems, unsigned blockOffset,
 | |
|                       ResourceDescriptor *resource, LSC_ADDR_SIZE addr_size, LSC_DATA_ORDER data_order, int immOffset,
 | |
|                       int immScale, LSC_CACHE_OPTS cacheOpts, LSC_DOC_ADDR_SPACE addrSpace);
 | |
|   void LSC_StoreScatter(LSC_OP subOp,
 | |
|                         CVariable *src, CVariable *offset, LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
 | |
|                         unsigned blockOffset, ResourceDescriptor *resource, LSC_ADDR_SIZE addr_size,
 | |
|                         LSC_DATA_ORDER data_order, int immOffset, int immScale, LSC_CACHE_OPTS cacheOpts,
 | |
|                         LSC_DOC_ADDR_SPACE addrSpace);
 | |
|   void LSC_LoadBlock1D(CVariable *dst, CVariable *offset, LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
 | |
|                        ResourceDescriptor *resource, LSC_ADDR_SIZE addrSize, int addrImmOffset,
 | |
|                        LSC_CACHE_OPTS cacheOpts);
 | |
|   void LSC_StoreBlock1D(CVariable *src, CVariable *offset, LSC_DATA_SIZE elemSize, LSC_DATA_ELEMS numElems,
 | |
|                         ResourceDescriptor *resource, LSC_ADDR_SIZE addrSize, int addrImmOffset,
 | |
|                         LSC_CACHE_OPTS cacheOpts);
 | |
|   void LSC_AtomicRaw(AtomicOp atomic_op, CVariable *dst,
 | |
|                      CVariable *offset, CVariable *src0, CVariable *src1, unsigned short bitwidth,
 | |
|                      ResourceDescriptor *resource, LSC_ADDR_SIZE addr_size, int immOff, int immScale,
 | |
|                      LSC_CACHE_OPTS cacheOpts);
 | |
|   void LSC_Fence(LSC_SFID sfid, LSC_SCOPE scope, LSC_FENCE_OP op);
 | |
|   void LSC_2DBlockMessage(LSC_OP subOp, ResourceDescriptor *resource, CVariable *dst, CVariable *bufId,
 | |
|                           CVariable *xOffset, CVariable *yOffset, unsigned blockWidth, unsigned blockHeight,
 | |
|                           unsigned elemSize, unsigned numBlocks, bool isTranspose, bool isVnni,
 | |
|                           CVariable *flatImageBaseoffset, CVariable *flatImageWidth, CVariable *flatImageHeight,
 | |
|                           CVariable *flatImagePitch,
 | |
|                           LSC_CACHE_OPTS cacheOpts = {LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT});
 | |
|   void LSC_2DBlockMessage(LSC_OP subOp, CVariable *Dst, CVariable *AddrPayload, CVariable *Src, uint32_t ImmX,
 | |
|                           uint32_t ImmY, uint32_t elemSize, uint32_t blockWidth, uint32_t blockHeight,
 | |
|                           uint32_t numBlocks, bool isTranspose, bool isVnni,
 | |
|                           LSC_CACHE_OPTS cacheOpts = {LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT});
 | |
|   void NamedBarrier(e_barrierKind BarrierKind, CVariable *src0, CVariable *src1, CVariable *src2, CVariable *src3);
 | |
|   void LSC_TypedReadWrite(LSC_OP subOp, ResourceDescriptor *resource, CVariable *pU, CVariable *pV, CVariable *pR,
 | |
|                           CVariable *pLODorSampleIdx, CVariable *pSrcDst, unsigned elemSize, unsigned numElems,
 | |
|                           LSC_ADDR_SIZE addr_size, int chMask,
 | |
|                           LSC_CACHE_OPTS cacheOpts = {LSC_CACHING_DEFAULT, LSC_CACHING_DEFAULT});
 | |
| 
 | |
| 
 | |
|   void LSC_TypedAtomic(AtomicOp subOp, ResourceDescriptor *resource, CVariable *pU, CVariable *pV, CVariable *pR,
 | |
|                        CVariable *pSrc0, CVariable *pSrc1, CVariable *pSrcDst, unsigned elemSize,
 | |
|                        LSC_ADDR_SIZE addr_size, LSC_CACHE_OPTS cacheOpts);
 | |
| 
 | |
| 
 | |
|   void LSC_Typed2dBlock(LSC_OP subOpcode, CVariable *dst, e_predefSurface surfaceType, CVariable *buf,
 | |
|                         CVariable *xOffset, CVariable *yOffset, int blockWidth, int blockHeight);
 | |
|   void LSC_UntypedAppendCounterAtomic(LSC_OP lscOp, ResourceDescriptor *resource, CVariable *dst, CVariable *src0);
 | |
|   void AppendBreakpoint();
 | |
|   void ScatterA64(CVariable *val, CVariable *offset, unsigned elementSize, unsigned numElems);
 | |
|   void ByteGather(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, unsigned elementSize,
 | |
|                   unsigned numElems);
 | |
|   void ByteScatter(CVariable *src, const ResourceDescriptor &resource, CVariable *offset, unsigned elementSize,
 | |
|                    unsigned numElems);
 | |
|   void Gather4Scaled(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, unsigned Mask = 0);
 | |
|   void Gather4ScaledNd(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, unsigned nd,
 | |
|                        unsigned Mask = 0);
 | |
|   void Scatter4Scaled(CVariable *src, const ResourceDescriptor &resource, CVariable *offset);
 | |
|   void Gather4A64(CVariable *dst, CVariable *offset);
 | |
|   void Scatter4A64(CVariable *src, CVariable *offset);
 | |
|   void BoolToInt(CVariable *dst, CVariable *src);
 | |
|   void Copy(CVariable *dst, CVariable *src);
 | |
|   void SubroutineCall(CVariable *flag, llvm::Function *F);
 | |
|   void SubroutineRet(CVariable *flag, llvm::Function *F);
 | |
|   void StackCall(CVariable *flag, llvm::Function *F, unsigned char argSize, unsigned char retSize);
 | |
|   void IndirectStackCall(CVariable *flag, CVariable *funcPtr, unsigned char argSize, unsigned char retSize);
 | |
|   void StackRet(CVariable *flag);
 | |
|   void Loc(unsigned int line);
 | |
|   void File(std::string &s);
 | |
|   void PredAdd(CVariable *flag, CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   void DebugLinePlaceholder();
 | |
| 
 | |
|   inline void Jump(uint label);
 | |
|   inline void Cast(CVariable *dst, CVariable *src);
 | |
|   inline void Add(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Bfi(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2, CVariable *src3);
 | |
|   inline void Bfe(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
 | |
|   inline void Bfrev(CVariable *dst, CVariable *src0);
 | |
|   inline void CBit(CVariable *dst, CVariable *src0);
 | |
|   inline void Fbh(CVariable *dst, CVariable *src0);
 | |
|   inline void Fbl(CVariable *dst, CVariable *src0);
 | |
|   inline void Mul(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Pow(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Div(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Shl(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Shr(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void MulH(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Cos(CVariable *dst, CVariable *src0);
 | |
|   inline void Sin(CVariable *dst, CVariable *src0);
 | |
|   inline void Log(CVariable *dst, CVariable *src0);
 | |
|   inline void Exp(CVariable *dst, CVariable *src0);
 | |
|   inline void Frc(CVariable *dst, CVariable *src0);
 | |
|   inline void Sqrt(CVariable *dst, CVariable *src0);
 | |
|   inline void Floor(CVariable *dst, CVariable *src0);
 | |
|   inline void Ceil(CVariable *dst, CVariable *src0);
 | |
|   inline void Ctlz(CVariable *dst, CVariable *src0);
 | |
|   inline void Truncate(CVariable *dst, CVariable *src0);
 | |
|   inline void RoundNE(CVariable *dst, CVariable *src0);
 | |
|   inline void Mod(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Rsqrt(CVariable *dst, CVariable *src0);
 | |
|   inline void Inv(CVariable *dst, CVariable *src0);
 | |
|   inline void Not(CVariable *dst, CVariable *src0);
 | |
|   // src0 * src1 + src2
 | |
|   inline void Madw(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
 | |
|   inline void Mad(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
 | |
|   inline void Lrp(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
 | |
|   inline void Xor(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Or(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void And(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Pln(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void SendC(CVariable *dst, CVariable *src, uint exDesc, CVariable *messDescriptor);
 | |
|   inline void SendC(CVariable *dst, CVariable *src, uint ffid, CVariable *exDesc, CVariable *messDescriptor);
 | |
|   inline void LoadMS(EOPCODE subOpcode, uint writeMask, CVariable *offset, const ResourceDescriptor &resource,
 | |
|                      uint numSources, CVariable *dst, llvm::SmallVector<CVariable *, 4> &payload, bool feedbackEnable);
 | |
|   inline void SetP(CVariable *dst, CVariable *src);
 | |
|   inline void Gather(CVariable *dst, CVariable *bufidx, CVariable *offset, CVariable *gOffset, e_predefSurface surface,
 | |
|                      int elementSize);
 | |
|   inline void TypedRead4(const ResourceDescriptor &resource, CVariable *pU, CVariable *pV, CVariable *pR,
 | |
|                          CVariable *pLOD, CVariable *pDst, uint writeMask);
 | |
|   inline void TypedWrite4(const ResourceDescriptor &resource, CVariable *pU, CVariable *pV, CVariable *pR,
 | |
|                           CVariable *pLOD, CVariable *pSrc, uint writeMask);
 | |
|   inline void Scatter(CVariable *val, CVariable *bufidx, CVariable *offset, CVariable *gOffset, e_predefSurface surface,
 | |
|                       int elementSize);
 | |
|   inline void IShr(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Min(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void Max(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   inline void UAddC(CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1);
 | |
|   inline void USubB(CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1);
 | |
|   inline void IEEESqrt(CVariable *dst, CVariable *src0);
 | |
|   inline void IEEEDivide(CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   void AddPair(CVariable *Lo, CVariable *Hi, CVariable *L0, CVariable *H0, CVariable *L1, CVariable *H1 = nullptr);
 | |
|   void SubPair(CVariable *Lo, CVariable *Hi, CVariable *L0, CVariable *H0, CVariable *L1, CVariable *H1);
 | |
|   inline void dp4a(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
 | |
|   void Lifetime(VISAVarLifetime StartOrEnd, CVariable *dst);
 | |
|   void dpas(CVariable *dst, CVariable *input, CVariable *weight, PrecisionType weight_precision, CVariable *actication,
 | |
|             PrecisionType activation_precision, uint8_t systolicDepth, uint8_t repeatCount, bool IsDpasw);
 | |
|   void fcvt(CVariable *dst, CVariable *src);
 | |
|   void srnd(CVariable *D, CVariable *S0, CVariable *R);
 | |
|   void Bfn(uint8_t booleanFuncCtrl, CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2);
 | |
|   void QWGather(CVariable *dst, const ResourceDescriptor &resource, CVariable *offset, unsigned elementSize,
 | |
|                 unsigned numElems);
 | |
|   void QWScatter(CVariable *src, const ResourceDescriptor &resource, CVariable *offset, unsigned elementSize,
 | |
|                  unsigned numElems);
 | |
|   // VME
 | |
|   void SendVmeIme(CVariable *bindingTableIndex, unsigned char streamMode, unsigned char searchControlMode,
 | |
|                   CVariable *uniInputVar, CVariable *imeInputVar, CVariable *ref0Var, CVariable *ref1Var,
 | |
|                   CVariable *costCenterVar, CVariable *outputVar);
 | |
| 
 | |
|   void SendVmeFbr(CVariable *bindingTableIndex, CVariable *uniInputVar, CVariable *fbrInputVar, CVariable *FBRMbModeVar,
 | |
|                   CVariable *FBRSubMbShapeVar, CVariable *FBRSubPredModeVar, CVariable *outputVar);
 | |
| 
 | |
|   void SendVmeSic(CVariable *bindingTableIndex, CVariable *uniInputVar, CVariable *sicInputVar, CVariable *outputVar);
 | |
| 
 | |
|   // VA
 | |
|   void SendVideoAnalytic(llvm::GenIntrinsicInst *inst, CVariable *vaResult, CVariable *coords, CVariable *size,
 | |
|                          CVariable *srcImg, CVariable *sampler);
 | |
| 
 | |
|   void SetDstSubVar(uint subVar);
 | |
|   void SetDstSubReg(uint subReg);
 | |
|   void SetSrcSubVar(uint srcNum, uint subVar);
 | |
|   void SetSrcSubReg(uint srcNum, uint subReg);
 | |
|   void SetDstModifier(e_modifier mod);
 | |
|   void SetDstModifier(const DstModifier &modifier);
 | |
|   void SetSrcModifier(uint srcNum, e_modifier mod);
 | |
|   void SetPredicate(CVariable *flag);
 | |
|   void SetInversePredicate(bool inv);
 | |
|   void SetPredicateMode(e_predMode mode);
 | |
|   void SetSrcRegion(uint srcNum, uint vStride, uint width, uint hStride, e_instance instance = EINSTANCE_UNSPECIFIED);
 | |
|   void SetDstRegion(uint hStride);
 | |
|   inline void SetNoMask();
 | |
|   inline void SetMask(e_mask mask);
 | |
|   inline void SetSimdSize(SIMDMode size);
 | |
|   inline SIMDMode GetSimdSize();
 | |
|   inline void SetUniformSIMDSize(SIMDMode size);
 | |
|   inline void SetSubSpanDestination(bool subspan);
 | |
|   inline bool IsSubSpanDestination();
 | |
|   inline void SetSecondHalf(bool secondHalf);
 | |
|   inline bool IsSecondHalf();
 | |
|   inline void SetSecondNibble(bool secondNibble);
 | |
|   inline bool IsSecondNibble();
 | |
| 
 | |
|   inline void SetIsCodePatchCandidate(bool v);
 | |
|   inline bool IsCodePatchCandidate();
 | |
|   inline unsigned int GetPayloadEnd();
 | |
|   inline void SetPayloadEnd(unsigned int payloadEnd);
 | |
|   inline void SetHasPrevKernel(bool v);
 | |
|   inline bool HasPrevKernel();
 | |
|   inline void BeginForcedNoMaskRegion();
 | |
|   inline void EndForcedNoMaskRegion();
 | |
|   inline bool GetUniqueExclusiveLoad();
 | |
|   inline void SetUniqueExcusiveLoad(bool v);
 | |
| 
 | |
|   void Wait();
 | |
| 
 | |
|   VISAKernel *GetVISAKernel() const { return vKernel; }
 | |
|   VISABuilder *GetVISABuilder() const { return vbuilder; }
 | |
|   void Init();
 | |
|   void Push();
 | |
| 
 | |
|   void initCR(VISAKernel *vKernel);
 | |
|   void SetVectorMask(bool vMask);
 | |
| 
 | |
|   // Switches from actualRM to newRM
 | |
|   void SetRoundingMode_FP(ERoundingMode actualRM, ERoundingMode newRM);
 | |
|   void SetRoundingMode_FPCvtInt(ERoundingMode actualRM, ERoundingMode newRM);
 | |
| 
 | |
|   static uint GetCISADataTypeSize(VISA_Type type) { return CVariable::GetCISADataTypeSize(type); }
 | |
|   static e_alignment GetCISADataTypeAlignment(VISA_Type type) { return CVariable::GetCISADataTypeAlignment(type); }
 | |
| 
 | |
|   static VISASampler3DSubOpCode ConvertSubOpcode(EOPCODE subOpcode, bool zeroLOD);
 | |
| 
 | |
|   // Wrappers for (potentially) common queries on types
 | |
|   static bool IsIntegerType(VISA_Type type);
 | |
|   static bool IsFloatType(VISA_Type type);
 | |
| 
 | |
|   void SetVISAWaTable(WA_TABLE const &waTable);
 | |
| 
 | |
|   /// \brief Initialize per function states and starts vISA emission
 | |
|   /// as a vISA subroutine
 | |
|   void BeginSubroutine(llvm::Function *F);
 | |
|   /// \brief Initialize per function states and starts vISA emission
 | |
|   /// as a vISA stack-call function
 | |
|   void BeginStackFunction(llvm::Function *F);
 | |
|   /// \brief Initialize interpolation section for vISA emission
 | |
|   void BeginPayloadSection();
 | |
| 
 | |
|   void DestroyVISABuilder();
 | |
| 
 | |
|   void AddVISASymbol(std::string &symName, CVariable *cvar);
 | |
| 
 | |
|   std::string GetVariableName(CVariable *var);
 | |
|   std::string GetDumpFileName(std::string extension);
 | |
| 
 | |
|   bool IsPayloadSectionAsPrimary() { return vKernel == vPayloadSection; }
 | |
|   void SetPayloadSectionAsPrimary() {
 | |
|     vKernelTmp = vKernel;
 | |
|     vKernel = vPayloadSection;
 | |
|   }
 | |
|   void SetPayloadSectionAsSecondary() { vKernel = vKernelTmp; }
 | |
| 
 | |
|   std::string GetUniqueInlineAsmLabel();
 | |
| 
 | |
|   bool IsVisaCompiledSuccessfully() const { return m_vIsaCompileStatus == VISA_SUCCESS; }
 | |
|   bool IsVisaCompileStatusFailure() const { return m_vIsaCompileStatus == VISA_FAILURE; }
 | |
| 
 | |
| private:
 | |
|   // helper functions
 | |
|   VISA_VectorOpnd *GetSourceOperand(CVariable *var, const SModifier &mod);
 | |
|   VISA_VectorOpnd *GetSourceOperandNoModifier(CVariable *var);
 | |
|   VISA_VectorOpnd *GetDestinationOperand(CVariable *var, const SModifier &mod);
 | |
|   VISA_RawOpnd *GetRawSource(CVariable *var, uint offset = 0);
 | |
|   VISA_RawOpnd *GetPairedResourceOperand(const ResourceDescriptor &pairedResource);
 | |
|   VISA_RawOpnd *GetRawDestination(CVariable *var, unsigned offset = 0);
 | |
|   VISA_PredOpnd *GetFlagOperand(const SFlag &flag);
 | |
|   VISA_StateOpndHandle *GetVISASurfaceOpnd(e_predefSurface surfaceType, CVariable *var);
 | |
|   VISA_StateOpndHandle *GetVISASurfaceOpnd(const ResourceDescriptor &resource);
 | |
|   VISA_LabelOpnd *GetOrCreateLabel(uint label, VISA_Label_Kind kind = LABEL_BLOCK);
 | |
|   VISA_LabelOpnd *GetFuncLabel(llvm::Function *F);
 | |
|   void InitLabelMap(const llvm::Function *F);
 | |
|   CName CreateVisaLabelName(const llvm::StringRef &L = "");
 | |
|   std::string CreateShortLabel(unsigned labelIndex) const;
 | |
|   // Compiler labels must start with something a user won't use in inline
 | |
|   // assembly.
 | |
|   static const char *GetCompilerLabelPrefix() { return "_"; }
 | |
| 
 | |
|   VISAFunction *GetStackFunction(llvm::Function *F);
 | |
| 
 | |
|   VISA_VectorOpnd *GetUniformSource(CVariable *var);
 | |
|   VISA_StateOpndHandle *GetBTIOperand(uint bindingTableIndex);
 | |
|   VISA_StateOpndHandle *GetSamplerOperand(CVariable *sampleIdx);
 | |
|   VISA_StateOpndHandle *GetSamplerOperand(const SamplerDescriptor &sampler);
 | |
|   void GetRowAndColOffset(CVariable *var, unsigned int subVar, unsigned int subreg, unsigned char &rowOff,
 | |
|                           unsigned char &colOff);
 | |
| 
 | |
|   VISA_GenVar *GetVISAVariable(CVariable *var);
 | |
|   VISA_GenVar *GetVISAVariable(CVariable *var, e_instance instance);
 | |
|   VISA_EMask_Ctrl ConvertMaskToVisaType(e_mask mask, bool noMask);
 | |
| 
 | |
|   // Generic encoding functions
 | |
|   void MinMax(CISA_MIN_MAX_SUB_OPCODE subopcode, CVariable *dst, CVariable *src0, CVariable *src1);
 | |
|   void DataMov(ISA_Opcode opcode, CVariable *dst, CVariable *src);
 | |
|   void LogicOp(ISA_Opcode opcode, CVariable *dst, CVariable *src0, CVariable *src1 = nullptr, CVariable *src2 = nullptr,
 | |
|                CVariable *src3 = nullptr);
 | |
|   void Arithmetic(ISA_Opcode opcode, CVariable *dst, CVariable *src0 = nullptr, CVariable *src1 = nullptr,
 | |
|                   CVariable *src2 = nullptr);
 | |
|   // dst may be special `null` CVariable, if only carry/borrow bits matter
 | |
|   void CarryBorrowArith(ISA_Opcode opcode, CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1);
 | |
|   void ScatterGather(ISA_Opcode opcode, CVariable *srcdst, CVariable *bufId, CVariable *offset, CVariable *gOffset,
 | |
|                      e_predefSurface surface, int elementSize);
 | |
|   void TypedReadWrite(ISA_Opcode opcode, const ResourceDescriptor &resource, CVariable *pU, CVariable *pV,
 | |
|                       CVariable *pR, CVariable *pLOD, CVariable *pSrcDst, uint writeMask);
 | |
| 
 | |
|   VISA_Exec_Size GetAluExecSize(CVariable *dst) const;
 | |
|   VISA_EMask_Ctrl GetAluEMask(CVariable *dst);
 | |
|   bool IsSat();
 | |
|   bool isSamplerIdxLT16(const SamplerDescriptor &sampler);
 | |
| 
 | |
|   // Variable splitting facilities (if crosses 2 GRF boundary).
 | |
|   bool NeedSplitting(CVariable *var, const SModifier &mod, unsigned &numParts, bool isSource = false) const;
 | |
|   SModifier SplitVariable(VISA_Exec_Size fromExecSize, VISA_Exec_Size toExecSize, unsigned thePart, CVariable *var,
 | |
|                           const SModifier &mod, bool isSource = false) const;
 | |
|   VISA_Exec_Size SplitExecSize(VISA_Exec_Size fromExecSize, unsigned numParts) const;
 | |
|   VISA_EMask_Ctrl SplitEMask(VISA_Exec_Size fromExecSize, VISA_Exec_Size toExecSize, unsigned thePart,
 | |
|                              VISA_EMask_Ctrl execMask) const;
 | |
| 
 | |
|   // Split SIMD16 message data payload(MDP) for scattered/untyped write
 | |
|   // messages into two SIMD8 MDPs : V0 and V1.
 | |
|   void SplitPayloadToLowerSIMD(CVariable *MDP, uint32_t MDPOfst, uint32_t NumBlks, CVariable *V0, CVariable *V1,
 | |
|                                uint32_t fromSize = 16);
 | |
|   // Merge two SIMD8 MDPs (V0 & V1) for scattered/untyped read messages into one
 | |
|   // SIMD16 message : MDP
 | |
|   void MergePayloadToHigherSIMD(CVariable *V0, CVariable *V1, uint32_t NumBlks, CVariable *MDP, uint32_t MDPOfst,
 | |
|                                 uint32_t toSize = 16);
 | |
| 
 | |
|   // save compile time by avoiding retry if the amount of spill is (very) small
 | |
|   bool AvoidRetryOnSmallSpill() const;
 | |
| 
 | |
|   // CreateSymbolTable
 | |
|   // Note that this function should be called only once even if there are
 | |
|   // multiple kernels in a program. Current IGC flow will create all symbols in
 | |
|   // the first kernel and all the other kernels won't contain symbols
 | |
|   typedef std::vector<std::pair<llvm::Value *, vISA::ZESymEntry>> ValueToSymbolList;
 | |
|   void CreateSymbolTable(ValueToSymbolList &symbolTableList);
 | |
|   // Create function symbols
 | |
|   void CreateFunctionSymbolTable(ValueToSymbolList &symbolTableList, SProgramOutput::ZEBinFuncSymbolTable &funcSyms);
 | |
|   // Create program symbols
 | |
|   void CreateProgramSymbolTable(ValueToSymbolList &symbolTableList,
 | |
|                                 SOpenCLProgramInfo::ZEBinProgramSymbolTable &programSyms);
 | |
|   // Create local symbols for kernels
 | |
|   void CreateLocalSymbol(const std::string &kernelName, vISA::GenSymType type, unsigned offset, unsigned size,
 | |
|                          SProgramOutput::ZEBinFuncSymbolTable &symbols);
 | |
| 
 | |
|   // input/output: relocations
 | |
|   void CreateRelocationTable(VISAKernel *pMainKernel, SProgramOutput::RelocListTy &relocations);
 | |
| 
 | |
|   // CreateFuncAttributeTable
 | |
|   void CreateFuncAttributeTable(VISAKernel *pMainKernel, GenXFunctionGroupAnalysis *pFga);
 | |
| 
 | |
|   // CreateGlobalHostAccessTable
 | |
|   typedef std::vector<vISA::HostAccessEntry> HostAccessList;
 | |
|   void CreateGlobalHostAccessTable(HostAccessList &hostAccessMap);
 | |
|   // input/output: global host access names
 | |
|   void CreateGlobalHostAccessTable(SOpenCLProgramInfo::ZEBinGlobalHostAccessTable &globalHostAccessTable);
 | |
| 
 | |
|   uint32_t getGRFSize() const;
 | |
| 
 | |
|   bool needsSplitting(VISA_Exec_Size ExecSize) const {
 | |
|     if (getGRFSize() == 64) {
 | |
|       return ExecSize == EXEC_SIZE_32;
 | |
|     }
 | |
|     return ExecSize == EXEC_SIZE_16;
 | |
|   }
 | |
| 
 | |
|   // Note that GEN can set both fpCvtInt_rtz and any of FP rounding modes
 | |
|   // at the same time. If fpCvtInt uses a rounding mode other than rtz,
 | |
|   // they both uses FP rounding bits.
 | |
|   //
 | |
|   // RM bits in CR0.0.
 | |
|   //    float RM bits: [5:4];
 | |
|   //    int RM (float -> int): Bit 12: 0 -> rtz; 1 -> using Float RM
 | |
|   enum RMEncoding {
 | |
|     // float rounding mode (fp operations, cvt to fp)
 | |
|     RoundToNearestEven = 0x00,
 | |
|     RoundToPositive = 0x10,
 | |
|     RoundToNegative = 0x20,
 | |
|     RoundToZero = 0x30,
 | |
|     // int rounding mode (fp cvt int only), use FP RM for all rounding modes
 | |
|     // but rtz.
 | |
|     RoundToNearestEven_int = 0x1000,
 | |
|     RoundToPositive_int = 0x1010,
 | |
|     RoundToNegative_int = 0x1020,
 | |
|     RoundToZero_int_unused = 0x1030,
 | |
|     RoundToZero_int = 0x0000, // use this for rtz, bit 12 = 0
 | |
| 
 | |
|     IntAndFPRoundingModeMask = 0x1030
 | |
|   };
 | |
|   void SetRoundingMode(RMEncoding actualRM, RMEncoding newRM);
 | |
|   // Get Encoding bit values for rounding mode
 | |
|   RMEncoding getEncoderRoundingMode_FP(ERoundingMode FP_RM);
 | |
|   RMEncoding getEncoderRoundingMode_FPCvtInt(ERoundingMode FCvtI_RM);
 | |
|   unsigned GetRawOpndSplitOffset(VISA_Exec_Size fromExecSize, VISA_Exec_Size toExecSize, unsigned thePart,
 | |
|                                  CVariable *var) const;
 | |
| 
 | |
|   std::tuple<CVariable *, uint32_t> splitRawOperand(CVariable *var, bool isFirstHalf, VISA_EMask_Ctrl execMask);
 | |
| 
 | |
|   uint32_t getNumChannels(CVariable *var) const;
 | |
| 
 | |
|   void SaveOption(vISAOptions option, bool val);
 | |
|   void SaveOption(vISAOptions option, uint32_t val);
 | |
|   void SaveOption(vISAOptions option, const char *val);
 | |
|   void SetBuilderOptions(VISABuilder *pbuilder);
 | |
| 
 | |
|   unsigned int GetSpillThreshold(SIMDMode simdmode);
 | |
| private:
 | |
|   // helper functions for compile flow
 | |
|   /// shaderOverrideVISAFirstPass - pre-process shader overide dir for visa
 | |
|   /// shader override. return if there are visa files to be overriden
 | |
|   bool shaderOverrideVISAFirstPass(std::vector<std::string> &visaOverrideFiles, std::string &kernelName);
 | |
| 
 | |
|   /// shaderOverrideVISASecondPassOrInlineAsm - handle visa inputs of shader
 | |
|   /// override or inline asm Return the VISAKernel built from overrided visa
 | |
|   /// or inline asm. Return nullptr if failed to create VISAKernel
 | |
|   VISAKernel *shaderOverrideVISASecondPassOrInlineAsm(bool visaAsmOverride, bool hasSymbolTable, bool emitVisaOnly,
 | |
|                                                       const std::vector<const char *> *additionalVISAAsmToLink,
 | |
|                                                       const std::vector<std::string> &visaOverrideFiles,
 | |
|                                                       const std::string kernelName);
 | |
| 
 | |
|   // setup m_retryManager according to jitinfo and other factors
 | |
|   void SetKernelRetryState(CodeGenContext *context, vISA::FINALIZER_INFO *jitInfo, GenXFunctionGroupAnalysis *&pFGA);
 | |
| 
 | |
|   /// create symbol tables and GlobalHostAccessTable
 | |
|   void createSymbolAndGlobalHostAccessTables(bool hasSymbolTable, VISAKernel &pMainKernel, unsigned int scratchOffset);
 | |
| 
 | |
|   /// create relocation tables according
 | |
|   void createRelocationTables(VISAKernel &pMainKernel);
 | |
| 
 | |
|   /// Estimate vISA stack size according to FunctionGroup information.
 | |
|   /// Conservatively increase the stack size when having recursive or indirect
 | |
|   /// calls if the given function is a Function Group Head. Function group
 | |
|   /// heads are kernels/entry functions those their stack size will be used to
 | |
|   /// reported back to the Runtime, so we set a conservative value. For other
 | |
|   /// non-head functions, we set the precise stack usage of the function.
 | |
|   ///
 | |
|   /// For group head functions, when having the stack call, the spillSize
 | |
|   /// returned by vISA is the sum of required stack size of all potential
 | |
|   /// callee and without considering recursive or indirect calls.
 | |
|   uint32_t getSpillMemSizeWithFG(const llvm::Function &curFunc, uint32_t curSize, GenXFunctionGroupAnalysis *fga,
 | |
|                                  uint32_t gtpinScratchUse);
 | |
| 
 | |
|   const vISA::KernelCostInfo *createKernelCostInfo(VISAKernel &pMainKenrel);
 | |
| 
 | |
| protected:
 | |
|   // encoder states
 | |
|   SEncoderState m_encoderState = {};
 | |
| 
 | |
|   llvm::DenseMap<SAlias, CVariable *, SAliasMapInfo> m_aliasesMap;
 | |
| 
 | |
|   // vISA needs its own Wa-table as some of the W/A are applicable
 | |
|   // only to certain APIs/shader types/reg key settings/etc.
 | |
|   WA_TABLE m_vISAWaTable = {};
 | |
| 
 | |
|   enum OpType { ET_BOOL, ET_INT32, ET_CSTR };
 | |
|   struct OptionValue {
 | |
|     OpType type;
 | |
|     bool vBool;
 | |
|     uint32_t vInt32;
 | |
|     const char *vCstr;
 | |
|   };
 | |
|   // List of vISA user options
 | |
|   std::vector<std::pair<vISAOptions, OptionValue>> m_visaUserOptions;
 | |
| 
 | |
|   // Typically IGC just use ones vKernel for every vISA::compile call,
 | |
|   // in those cases, vKernel and vMainKernel should be the same.
 | |
|   // Only when using stack-call, vKernel pointer changes every time
 | |
|   // IGC addes a vISA kernel or function object, but the vMainKernel
 | |
|   // always pointing to the first kernel added during InitEncoder.
 | |
|   VISAKernel *vKernel;
 | |
|   VISAKernel *vMainKernel;
 | |
|   VISABuilder *vbuilder;
 | |
|   VISABuilder *vAsmTextBuilder;
 | |
| 
 | |
|   // This is for CodePatch to split payload interpolation from a shader
 | |
|   VISAKernel *vPayloadSection;
 | |
|   VISAKernel *vKernelTmp;
 | |
|   bool m_hasPrevKernel = false;
 | |
|   unsigned int m_payloadEnd = 0;
 | |
|   unsigned int m_argumentStackSize = 0;
 | |
| 
 | |
|   bool m_isCodePatchCandidate = false;
 | |
| 
 | |
|   bool m_hasUniqueExclusiveLoad = false;
 | |
| 
 | |
|   int m_nestLevelForcedNoMaskRegion = 0;
 | |
| 
 | |
|   bool m_enableVISAdump = false;
 | |
|   bool m_hasInlineAsm = false;
 | |
| 
 | |
|   std::vector<VISA_LabelOpnd *> labelMap;
 | |
|   std::vector<CName> labelNameMap; // parallel to labelMap
 | |
| 
 | |
|   /// Per kernel label counter
 | |
|   unsigned labelCounter = 0;
 | |
|   /// Per kernel label counter for each inline asm block
 | |
|   unsigned labelInlineAsmCounter = 0;
 | |
|   /// Each kernel might emit several functions;
 | |
|   /// we pre-increment this for each new function we process (InitLabelMap)
 | |
|   /// The first function will see 0, ...
 | |
|   unsigned labelFunctionIndex = (unsigned)-1;
 | |
|   ///
 | |
|   /// The name of the current function; set if we are emitting labels
 | |
|   CName currFunctionName;
 | |
| 
 | |
|   /// Keep a map between a function and its label, per kernel state.
 | |
|   llvm::MapVector<llvm::Function *, VISA_LabelOpnd *> funcLabelMap;
 | |
|   /// Keep a map between a stack-called function and the corresponding vISA
 | |
|   /// function
 | |
|   llvm::MapVector<llvm::Function *, VISAFunction *> stackFuncMap;
 | |
| 
 | |
|   // dummy variables
 | |
|   VISA_SurfaceVar *dummySurface;
 | |
|   VISA_SamplerVar *samplervar;
 | |
| 
 | |
|   CShader *m_program;
 | |
|   int m_vIsaCompileStatus = VISA_FAILURE;
 | |
| 
 | |
|   // Keep a map between a function and its per-function attributes needed for
 | |
|   // function pointer support
 | |
|   struct FuncAttrib {
 | |
|     bool isKernel = false;
 | |
|     bool hasBarrier = false;
 | |
|     unsigned argumentStackSize = 0;
 | |
|     unsigned allocaStackSize = 0;
 | |
|   };
 | |
|   llvm::MapVector<llvm::Function *, FuncAttrib> funcAttributeMap;
 | |
| 
 | |
| public:
 | |
|   // Used by EmitVISAPass to set function attributes
 | |
|   void InitFuncAttribute(llvm::Function *F, bool isKernel = false) { funcAttributeMap[F].isKernel = isKernel; }
 | |
|   void SetFunctionHasBarrier(llvm::Function *F) {
 | |
|     if (funcAttributeMap.find(F) != funcAttributeMap.end())
 | |
|       funcAttributeMap[F].hasBarrier = true;
 | |
|   }
 | |
|   void SetFunctionMaxArgumentStackSize(llvm::Function *F, unsigned size) {
 | |
|     if (funcAttributeMap.find(F) != funcAttributeMap.end())
 | |
|       m_argumentStackSize = funcAttributeMap[F].argumentStackSize = MAX(funcAttributeMap[F].argumentStackSize, size);
 | |
|   }
 | |
|   void SetFunctionAllocaStackSize(llvm::Function *F, unsigned size) {
 | |
|     if (funcAttributeMap.find(F) != funcAttributeMap.end())
 | |
|       funcAttributeMap[F].allocaStackSize = size;
 | |
|   }
 | |
| };
 | |
| 
 | |
| inline void CEncoder::Jump(uint label) { Jump(NULL, label); }
 | |
| 
 | |
| inline void CEncoder::Bfi(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2, CVariable *src3) {
 | |
|   LogicOp(ISA_BFI, dst, src0, src1, src2, src3);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::Bfe(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
 | |
|   LogicOp(ISA_BFE, dst, src0, src1, src2);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::Bfrev(CVariable *dst, CVariable *src0) { LogicOp(ISA_BFREV, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::CBit(CVariable *dst, CVariable *src) { LogicOp(ISA_CBIT, dst, src); }
 | |
| 
 | |
| inline void CEncoder::Fbh(CVariable *dst, CVariable *src) { LogicOp(ISA_FBH, dst, src); }
 | |
| 
 | |
| inline void CEncoder::Fbl(CVariable *dst, CVariable *src) { LogicOp(ISA_FBL, dst, src); }
 | |
| 
 | |
| inline void CEncoder::Mul(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_MUL, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Pow(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_POW, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Div(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_DIV, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Add(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_ADD, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Shl(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_SHL, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::IShr(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_ASR, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Shr(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_SHR, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::MulH(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_MULH, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Cos(CVariable *dst, CVariable *src0) { Arithmetic(ISA_COS, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Sin(CVariable *dst, CVariable *src0) { Arithmetic(ISA_SIN, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Log(CVariable *dst, CVariable *src0) { Arithmetic(ISA_LOG, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Exp(CVariable *dst, CVariable *src0) { Arithmetic(ISA_EXP, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Sqrt(CVariable *dst, CVariable *src0) { Arithmetic(ISA_SQRT, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Floor(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RNDD, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Ceil(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RNDU, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Ctlz(CVariable *dst, CVariable *src0) { Arithmetic(ISA_LZD, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Truncate(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RNDZ, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::RoundNE(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RNDE, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Mod(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_MOD, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Rsqrt(CVariable *dst, CVariable *src0) { Arithmetic(ISA_RSQRT, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Inv(CVariable *dst, CVariable *src0) { Arithmetic(ISA_INV, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Not(CVariable *dst, CVariable *src0) { Arithmetic(ISA_NOT, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Frc(CVariable *dst, CVariable *src0) { Arithmetic(ISA_FRC, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::Pln(CVariable *dst, CVariable *src0, CVariable *src1) { Arithmetic(ISA_PLANE, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Cast(CVariable *dst, CVariable *src) { DataMov(ISA_MOV, dst, src); }
 | |
| 
 | |
| // src0 * src1 + src2
 | |
| inline void CEncoder::Madw(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
 | |
|   Arithmetic(ISA_MADW, dst, src0, src1, src2);
 | |
| }
 | |
| 
 | |
| // src0 * src1 + src2
 | |
| inline void CEncoder::Mad(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
 | |
|   Arithmetic(ISA_MAD, dst, src0, src1, src2);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::Lrp(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
 | |
|   Arithmetic(ISA_LRP, dst, src0, src1, src2);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::Xor(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_XOR, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Or(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_OR, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::And(CVariable *dst, CVariable *src0, CVariable *src1) { LogicOp(ISA_AND, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::SetP(CVariable *dst, CVariable *src0) {
 | |
|   // We always need no mask when doing a set predicate
 | |
|   m_encoderState.m_noMask = true;
 | |
|   DataMov(ISA_SETP, dst, src0);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::Min(CVariable *dst, CVariable *src0, CVariable *src1) { MinMax(CISA_DM_FMIN, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::Max(CVariable *dst, CVariable *src0, CVariable *src1) { MinMax(CISA_DM_FMAX, dst, src0, src1); }
 | |
| 
 | |
| inline void CEncoder::UAddC(CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1) {
 | |
|   CarryBorrowArith(ISA_ADDC, dst, dstCarryBorrow, src0, src1);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::USubB(CVariable *dst, CVariable *dstCarryBorrow, CVariable *src0, CVariable *src1) {
 | |
|   CarryBorrowArith(ISA_SUBB, dst, dstCarryBorrow, src0, src1);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::LoadMS(EOPCODE subOpcode, uint writeMask, CVariable *offset, const ResourceDescriptor &resource,
 | |
|                              uint numSources, CVariable *dst, llvm::SmallVector<CVariable *, 4> &payload,
 | |
|                              bool feedbackEnable) {
 | |
|   ResourceDescriptor pairedResource{};
 | |
|   Load(subOpcode, writeMask, offset, resource, pairedResource, numSources, dst, payload, false, feedbackEnable);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::Gather(CVariable *dst, CVariable *bufId, CVariable *offset, CVariable *gOffset,
 | |
|                              e_predefSurface surface, int elementSize) {
 | |
|   ScatterGather(ISA_GATHER, dst, bufId, offset, gOffset, surface, elementSize);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::TypedRead4(const ResourceDescriptor &resource, CVariable *pU, CVariable *pV, CVariable *pR,
 | |
|                                  CVariable *pLOD, CVariable *pDst, uint writeMask) {
 | |
|   TypedReadWrite(ISA_GATHER4_TYPED, resource, pU, pV, pR, pLOD, pDst, writeMask);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::TypedWrite4(const ResourceDescriptor &resource, CVariable *pU, CVariable *pV, CVariable *pR,
 | |
|                                   CVariable *pLOD, CVariable *pSrc, uint writeMask) {
 | |
|   TypedReadWrite(ISA_SCATTER4_TYPED, resource, pU, pV, pR, pLOD, pSrc, writeMask);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::Scatter(CVariable *val, CVariable *bufidx, CVariable *offset, CVariable *gOffset,
 | |
|                               e_predefSurface surface, int elementSize) {
 | |
|   ScatterGather(ISA_SCATTER, val, bufidx, offset, gOffset, surface, elementSize);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::SendC(CVariable *dst, CVariable *src, uint exDesc, CVariable *messDescriptor) {
 | |
|   Send(dst, src, exDesc, messDescriptor, true);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::SendC(CVariable *dst, CVariable *src, uint ffid, CVariable *exDesc, CVariable *messDescriptor) {
 | |
|   Send(dst, src, ffid, exDesc, messDescriptor, true);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::IEEESqrt(CVariable *dst, CVariable *src0) { Arithmetic(ISA_SQRTM, dst, src0); }
 | |
| 
 | |
| inline void CEncoder::IEEEDivide(CVariable *dst, CVariable *src0, CVariable *src1) {
 | |
|   Arithmetic(ISA_DIVM, dst, src0, src1);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::dp4a(CVariable *dst, CVariable *src0, CVariable *src1, CVariable *src2) {
 | |
|   Arithmetic(ISA_DP4A, dst, src0, src1, src2);
 | |
| }
 | |
| 
 | |
| inline void CEncoder::SetIsCodePatchCandidate(bool v) { m_isCodePatchCandidate = v; }
 | |
| 
 | |
| inline bool CEncoder::IsCodePatchCandidate() { return m_isCodePatchCandidate; }
 | |
| 
 | |
| inline void CEncoder::SetPayloadEnd(unsigned int payloadEnd) { m_payloadEnd = payloadEnd; }
 | |
| 
 | |
| inline unsigned int CEncoder::GetPayloadEnd() { return m_payloadEnd; }
 | |
| 
 | |
| inline void CEncoder::SetHasPrevKernel(bool v) { m_hasPrevKernel = v; }
 | |
| 
 | |
| inline bool CEncoder::HasPrevKernel() { return m_hasPrevKernel; }
 | |
| 
 | |
| inline bool CEncoder::GetUniqueExclusiveLoad() { return m_hasUniqueExclusiveLoad; }
 | |
| 
 | |
| inline void CEncoder::SetUniqueExcusiveLoad(bool v) { m_hasUniqueExclusiveLoad = v; }
 | |
| 
 | |
| inline void CEncoder::BeginForcedNoMaskRegion() {
 | |
|   ++m_nestLevelForcedNoMaskRegion;
 | |
|   // Start submitting insts with NoMask control
 | |
|   m_encoderState.m_noMask = true;
 | |
| }
 | |
| 
 | |
| inline void CEncoder::EndForcedNoMaskRegion() {
 | |
|   --m_nestLevelForcedNoMaskRegion;
 | |
|   IGC_ASSERT_MESSAGE(m_nestLevelForcedNoMaskRegion >= 0, "Invalid nesting of Unmasked regions");
 | |
|   // Out of unmasked region, return to submitting insts
 | |
|   // with Mask control
 | |
|   if (m_nestLevelForcedNoMaskRegion == 0)
 | |
|     m_encoderState.m_noMask = false;
 | |
| }
 | |
| 
 | |
| inline void CEncoder::SetNoMask() { m_encoderState.m_noMask = true; }
 | |
| 
 | |
| inline void CEncoder::SetMask(e_mask mask) { m_encoderState.m_mask = mask; }
 | |
| 
 | |
| inline void CEncoder::SetSimdSize(SIMDMode size) { m_encoderState.m_simdSize = size; }
 | |
| 
 | |
| inline SIMDMode CEncoder::GetSimdSize() { return m_encoderState.m_simdSize; }
 | |
| 
 | |
| inline void CEncoder::SetUniformSIMDSize(SIMDMode size) { m_encoderState.m_uniformSIMDSize = size; }
 | |
| 
 | |
| inline void CEncoder::SetSubSpanDestination(bool subspan) { m_encoderState.m_SubSpanDestination = subspan; }
 | |
| 
 | |
| inline void CEncoder::SetSecondHalf(bool secondHalf) { m_encoderState.m_secondHalf = secondHalf; }
 | |
| 
 | |
| inline bool CEncoder::IsSecondHalf() { return m_encoderState.m_secondHalf; }
 | |
| 
 | |
| inline void CEncoder::SetSecondNibble(bool secondNibble) { m_encoderState.m_secondNibble = secondNibble; }
 | |
| 
 | |
| inline bool CEncoder::IsSecondNibble() { return m_encoderState.m_secondNibble; }
 | |
| 
 | |
| inline bool CEncoder::IsSubSpanDestination() { return m_encoderState.m_SubSpanDestination; }
 | |
| 
 | |
| VISA_Modifier ConvertModifierToVisaType(e_modifier modifier);
 | |
| VISA_Cond_Mod ConvertCondModToVisaType(e_predicate condMod);
 | |
| VISA_Oword_Num ConvertSizeToVisaType(uint size);
 | |
| VISAChannelMask ConvertChannelMaskToVisaType(uint mask);
 | |
| VISASourceSingleChannel ConvertSingleSourceChannel(uint srcChannel);
 | |
| 
 | |
| GenPrecision ConvertPrecisionToVisaType(PrecisionType P);
 | |
| } // namespace IGC
 |