2017-12-21 00:45:38 +01:00
/*
2022-01-10 18:55:21 +00:00
* Copyright ( C ) 2018 - 2022 Intel Corporation
2017-12-21 00:45:38 +01:00
*
2018-09-18 09:11:08 +02:00
* SPDX - License - Identifier : MIT
2017-12-21 00:45:38 +01:00
*
*/
# pragma once
2020-02-23 22:44:01 +01:00
# include "shared/source/command_stream/command_stream_receiver_hw.h"
2021-05-31 12:04:57 +00:00
# include "shared/source/command_stream/csr_properties_flags.h"
2020-02-23 22:44:01 +01:00
# include "shared/source/command_stream/thread_arbitration_policy.h"
# include "shared/source/debug_settings/debug_settings_manager.h"
# include "shared/source/device/device.h"
# include "shared/source/helpers/address_patch.h"
# include "shared/source/helpers/preamble.h"
2020-12-04 09:42:34 +00:00
# include "shared/source/helpers/timestamp_packet.h"
2021-09-08 13:20:44 +00:00
# include "shared/source/kernel/implicit_args.h"
2021-09-24 16:40:29 +00:00
# include "shared/source/kernel/kernel_execution_type.h"
2021-09-29 19:10:53 +00:00
# include "shared/source/program/kernel_info.h"
2020-02-23 22:44:01 +01:00
# include "shared/source/unified_memory/unified_memory.h"
# include "shared/source/utilities/stackvec.h"
2020-02-24 10:22:30 +01:00
2020-02-26 14:21:01 +01:00
# include "opencl/extensions/public/cl_ext_private.h"
2020-02-22 22:50:57 +01:00
# include "opencl/source/api/cl_types.h"
2020-11-24 14:20:33 +00:00
# include "opencl/source/cl_device/cl_device.h"
2020-02-22 22:50:57 +01:00
# include "opencl/source/helpers/base_object.h"
# include "opencl/source/helpers/properties_helper.h"
2020-12-22 00:03:25 +00:00
# include "opencl/source/kernel/kernel_objects_for_aux_translation.h"
2020-02-22 22:50:57 +01:00
# include "opencl/source/program/program.h"
2019-02-27 11:39:32 +01:00
2017-12-21 00:45:38 +01:00
# include <vector>
2019-03-26 11:59:46 +01:00
namespace NEO {
2017-12-21 00:45:38 +01:00
struct CompletionStamp ;
2018-08-07 15:09:16 +02:00
class Buffer ;
2022-01-19 15:18:21 +00:00
class CommandQueue ;
2019-10-27 19:48:26 +01:00
class CommandStreamReceiver ;
2017-12-21 00:45:38 +01:00
class GraphicsAllocation ;
2018-03-27 14:30:05 +02:00
class ImageTransformer ;
2017-12-21 00:45:38 +01:00
class Surface ;
class PrintfHandler ;
2021-03-09 10:30:21 +00:00
class MultiDeviceKernel ;
2017-12-21 00:45:38 +01:00
2021-03-09 10:30:21 +00:00
class Kernel : public ReferenceTrackedObject < Kernel > {
2017-12-21 00:45:38 +01:00
public :
2021-10-05 12:15:54 +00:00
static const uint32_t kernelBinaryAlignment = 64 ;
2017-12-21 00:45:38 +01:00
enum kernelArgType {
NONE_OBJ ,
IMAGE_OBJ ,
BUFFER_OBJ ,
PIPE_OBJ ,
SVM_OBJ ,
SVM_ALLOC_OBJ ,
SAMPLER_OBJ ,
ACCELERATOR_OBJ ,
DEVICE_QUEUE_OBJ ,
SLM_OBJ
} ;
struct SimpleKernelArgInfo {
2022-07-01 18:03:54 +00:00
cl_mem_flags svmFlags ;
2018-08-10 04:42:52 -07:00
void * object ;
2017-12-21 00:45:38 +01:00
const void * value ;
size_t size ;
2022-07-01 18:03:54 +00:00
GraphicsAllocation * svmAllocation ;
kernelArgType type ;
2022-02-25 14:28:18 +00:00
uint32_t allocId ;
2022-03-02 11:08:18 +00:00
uint32_t allocIdMemoryManagerCounter ;
2022-07-01 18:03:54 +00:00
bool isPatched = false ;
bool isStatelessUncacheable = false ;
2022-04-22 12:25:17 +00:00
bool isSetToNullptr = false ;
2017-12-21 00:45:38 +01:00
} ;
2020-12-04 09:42:34 +00:00
enum class TunningStatus {
STANDARD_TUNNING_IN_PROGRESS ,
SUBDEVICE_TUNNING_IN_PROGRESS ,
TUNNING_DONE
} ;
2021-01-11 15:11:14 +00:00
enum class TunningType {
DISABLED ,
SIMPLE ,
FULL
} ;
2017-12-21 00:45:38 +01:00
typedef int32_t ( Kernel : : * KernelArgHandler ) ( uint32_t argIndex ,
size_t argSize ,
const void * argVal ) ;
template < typename kernel_t = Kernel , typename program_t = Program >
2021-03-22 15:26:03 +00:00
static kernel_t * create ( program_t * program , const KernelInfo & kernelInfo , ClDevice & clDevice , cl_int * errcodeRet ) {
2017-12-21 00:45:38 +01:00
cl_int retVal ;
kernel_t * pKernel = nullptr ;
2021-03-22 15:26:03 +00:00
pKernel = new kernel_t ( program , kernelInfo , clDevice ) ;
2020-01-11 18:25:26 +01:00
retVal = pKernel - > initialize ( ) ;
2017-12-21 00:45:38 +01:00
if ( retVal ! = CL_SUCCESS ) {
delete pKernel ;
pKernel = nullptr ;
}
2022-10-22 08:46:42 +02:00
auto localMemSize = static_cast < uint32_t > ( clDevice . getDevice ( ) . getDeviceInfo ( ) . localMemSize ) ;
auto slmInlineSize = kernelInfo . kernelDescriptor . kernelAttributes . slmInlineSize ;
if ( slmInlineSize > 0 & & localMemSize < slmInlineSize ) {
PRINT_DEBUG_STRING ( NEO : : DebugManager . flags . PrintDebugMessages . get ( ) , stderr , " Size of SLM (%u) larger than available (%u) \n " , slmInlineSize , localMemSize ) ;
retVal = CL_OUT_OF_RESOURCES ;
2022-09-08 12:01:51 +00:00
}
2017-12-21 00:45:38 +01:00
if ( errcodeRet ) {
* errcodeRet = retVal ;
}
2022-05-16 14:06:56 +00:00
if ( fileLoggerInstance ( ) . enabled ( ) ) {
2019-08-29 15:10:51 +02:00
std : : string source ;
program - > getSource ( source ) ;
2022-05-16 14:06:56 +00:00
fileLoggerInstance ( ) . dumpKernel ( kernelInfo . kernelDescriptor . kernelMetadata . kernelName , source ) ;
2017-12-21 00:45:38 +01:00
}
return pKernel ;
}
Kernel & operator = ( const Kernel & ) = delete ;
Kernel ( const Kernel & ) = delete ;
2022-05-09 17:40:30 +00:00
~ Kernel ( ) override ;
2017-12-21 00:45:38 +01:00
static bool isMemObj ( kernelArgType kernelArg ) {
return kernelArg = = BUFFER_OBJ | | kernelArg = = IMAGE_OBJ | | kernelArg = = PIPE_OBJ ;
}
2018-08-03 08:14:43 +02:00
bool isAuxTranslationRequired ( ) const { return auxTranslationRequired ; }
2020-12-15 17:13:28 +00:00
void setAuxTranslationRequired ( bool onOff ) { auxTranslationRequired = onOff ; }
void updateAuxTranslationRequired ( ) ;
2018-08-03 08:14:43 +02:00
2021-06-11 11:24:27 +00:00
ArrayRef < uint8_t > getCrossThreadDataRef ( ) {
return ArrayRef < uint8_t > ( reinterpret_cast < uint8_t * > ( crossThreadData ) , crossThreadDataSize ) ;
}
2021-03-22 11:06:23 +00:00
char * getCrossThreadData ( ) const {
return crossThreadData ;
2017-12-21 00:45:38 +01:00
}
2021-03-22 11:06:23 +00:00
uint32_t getCrossThreadDataSize ( ) const {
return crossThreadDataSize ;
2017-12-21 00:45:38 +01:00
}
cl_int initialize ( ) ;
2018-08-07 09:22:55 +02:00
MOCKABLE_VIRTUAL cl_int cloneKernel ( Kernel * pSourceKernel ) ;
2017-12-21 00:45:38 +01:00
2018-03-27 14:30:05 +02:00
MOCKABLE_VIRTUAL bool canTransformImages ( ) const ;
2017-12-21 00:45:38 +01:00
MOCKABLE_VIRTUAL bool isPatched ( ) const ;
// API entry points
2021-03-17 12:44:20 +01:00
cl_int setArgument ( uint32_t argIndex , size_t argSize , const void * argVal ) { return setArg ( argIndex , argSize , argVal ) ; }
2021-01-26 11:44:10 +00:00
cl_int setArgSvm ( uint32_t argIndex , size_t svmAllocSize , void * svmPtr , GraphicsAllocation * svmAlloc , cl_mem_flags svmFlags ) ;
2022-02-25 14:28:18 +00:00
MOCKABLE_VIRTUAL cl_int setArgSvmAlloc ( uint32_t argIndex , void * svmPtr , GraphicsAllocation * svmAlloc , uint32_t allocId ) ;
2017-12-21 00:45:38 +01:00
2019-06-14 07:10:45 +02:00
void setSvmKernelExecInfo ( GraphicsAllocation * argValue ) ;
void clearSvmKernelExecInfo ( ) ;
2017-12-21 00:45:38 +01:00
cl_int getInfo ( cl_kernel_info paramName , size_t paramValueSize ,
void * paramValue , size_t * paramValueSizeRet ) const ;
cl_int getArgInfo ( cl_uint argIndx , cl_kernel_arg_info paramName ,
size_t paramValueSize , void * paramValue , size_t * paramValueSizeRet ) const ;
2021-03-23 17:11:41 +00:00
cl_int getWorkGroupInfo ( cl_kernel_work_group_info paramName ,
2017-12-21 00:45:38 +01:00
size_t paramValueSize , void * paramValue , size_t * paramValueSizeRet ) const ;
2021-03-23 17:11:41 +00:00
cl_int getSubGroupInfo ( cl_kernel_sub_group_info paramName ,
2017-12-21 00:45:38 +01:00
size_t inputValueSize , const void * inputValue ,
size_t paramValueSize , void * paramValue ,
size_t * paramValueSizeRet ) const ;
2021-03-22 15:26:03 +00:00
const void * getKernelHeap ( ) const ;
void * getSurfaceStateHeap ( ) const ;
const void * getDynamicStateHeap ( ) const ;
2017-12-21 00:45:38 +01:00
2021-03-22 15:26:03 +00:00
size_t getKernelHeapSize ( ) const ;
size_t getSurfaceStateHeapSize ( ) const ;
size_t getDynamicStateHeapSize ( ) const ;
2021-03-22 11:06:23 +00:00
size_t getNumberOfBindingTableStates ( ) const ;
size_t getBindingTableOffset ( ) const {
return localBindingTableOffset ;
2018-02-08 16:00:20 +01:00
}
2021-03-22 11:06:23 +00:00
void resizeSurfaceStateHeap ( void * pNewSsh , size_t newSshSize , size_t newBindingTableCount , size_t newBindingTableOffset ) ;
2017-12-21 00:45:38 +01:00
2021-03-23 17:11:41 +00:00
void substituteKernelHeap ( void * newKernelHeap , size_t newKernelHeapSize ) ;
2021-03-22 15:26:03 +00:00
bool isKernelHeapSubstituted ( ) const ;
uint64_t getKernelId ( ) const ;
void setKernelId ( uint64_t newKernelId ) ;
2018-06-04 09:09:04 +02:00
uint32_t getStartOffset ( ) const ;
void setStartOffset ( uint32_t offset ) ;
2018-01-24 13:26:46 +01:00
2017-12-21 00:45:38 +01:00
const std : : vector < SimpleKernelArgInfo > & getKernelArguments ( ) const {
return kernelArguments ;
}
size_t getKernelArgsNumber ( ) const {
2020-12-18 14:47:42 +00:00
return kernelArguments . size ( ) ;
2017-12-21 00:45:38 +01:00
}
2021-04-08 11:05:45 +02:00
bool usesBindfulAddressingForBuffers ( ) const {
return KernelDescriptor : : BindfulAndStateless = = kernelInfo . kernelDescriptor . kernelAttributes . bufferAddressingMode ;
2017-12-21 00:45:38 +01:00
}
2021-06-11 11:24:27 +00:00
inline const KernelDescriptor & getDescriptor ( ) const {
return kernelInfo . kernelDescriptor ;
}
inline const KernelInfo & getKernelInfo ( ) const {
2021-03-22 15:26:03 +00:00
return kernelInfo ;
2017-12-21 00:45:38 +01:00
}
Context & getContext ( ) const {
2020-11-18 17:06:55 +01:00
return program - > getContext ( ) ;
2017-12-21 00:45:38 +01:00
}
Program * getProgram ( ) const { return program ; }
2021-03-22 15:26:03 +00:00
uint32_t getScratchSize ( ) {
return kernelInfo . kernelDescriptor . kernelAttributes . perThreadScratchSize [ 0 ] ;
2017-12-21 00:45:38 +01:00
}
2021-03-22 15:26:03 +00:00
uint32_t getPrivateScratchSize ( ) {
return kernelInfo . kernelDescriptor . kernelAttributes . perThreadScratchSize [ 1 ] ;
2019-06-27 14:06:19 +02:00
}
2021-06-21 15:24:14 +00:00
bool usesSyncBuffer ( ) const ;
2021-03-23 17:11:41 +00:00
void patchSyncBuffer ( GraphicsAllocation * gfxAllocation , size_t bufferOffset ) ;
2021-06-10 11:25:10 +00:00
void * patchBindlessSurfaceState ( NEO : : GraphicsAllocation * alloc , uint32_t bindless ) ;
2017-12-21 00:45:38 +01:00
// Helpers
cl_int setArg ( uint32_t argIndex , uint32_t argValue ) ;
2019-11-12 22:53:18 +01:00
cl_int setArg ( uint32_t argIndex , uint64_t argValue ) ;
2017-12-21 00:45:38 +01:00
cl_int setArg ( uint32_t argIndex , cl_mem argValue ) ;
2018-03-12 16:32:08 +01:00
cl_int setArg ( uint32_t argIndex , cl_mem argValue , uint32_t mipLevel ) ;
2021-03-17 12:44:20 +01:00
cl_int setArg ( uint32_t argIndex , size_t argSize , const void * argVal ) ;
2017-12-21 00:45:38 +01:00
// Handlers
void setKernelArgHandler ( uint32_t argIndex , KernelArgHandler handler ) ;
void unsetArg ( uint32_t argIndex ) ;
cl_int setArgImmediate ( uint32_t argIndex ,
size_t argSize ,
const void * argVal ) ;
cl_int setArgBuffer ( uint32_t argIndex ,
size_t argSize ,
const void * argVal ) ;
cl_int setArgPipe ( uint32_t argIndex ,
size_t argSize ,
const void * argVal ) ;
cl_int setArgImage ( uint32_t argIndex ,
size_t argSize ,
const void * argVal ) ;
2018-03-12 16:32:08 +01:00
cl_int setArgImageWithMipLevel ( uint32_t argIndex ,
size_t argSize ,
const void * argVal , uint32_t mipLevel ) ;
2017-12-21 00:45:38 +01:00
cl_int setArgLocal ( uint32_t argIndex ,
size_t argSize ,
const void * argVal ) ;
cl_int setArgSampler ( uint32_t argIndex ,
size_t argSize ,
const void * argVal ) ;
cl_int setArgAccelerator ( uint32_t argIndex ,
size_t argSize ,
const void * argVal ) ;
void storeKernelArg ( uint32_t argIndex ,
kernelArgType argType ,
2018-08-10 04:42:52 -07:00
void * argObject ,
2017-12-21 00:45:38 +01:00
const void * argValue ,
size_t argSize ,
2021-01-26 11:44:10 +00:00
GraphicsAllocation * argSvmAlloc = nullptr ,
2017-12-21 00:45:38 +01:00
cl_mem_flags argSvmFlags = 0 ) ;
2022-03-02 14:55:18 +00:00
void storeKernelArgAllocIdMemoryManagerCounter ( uint32_t argIndex , uint32_t allocIdMemoryManagerCounter ) ;
2017-12-21 00:45:38 +01:00
const void * getKernelArg ( uint32_t argIndex ) const ;
const SimpleKernelArgInfo & getKernelArgInfo ( uint32_t argIndex ) const ;
2018-11-14 08:40:37 +01:00
bool getAllowNonUniform ( ) const { return program - > getAllowNonUniform ( ) ; }
2021-04-08 11:05:45 +02:00
bool isVmeKernel ( ) const { return kernelInfo . kernelDescriptor . kernelAttributes . flags . usesVme ; }
2022-08-31 13:26:29 +00:00
bool requiresSystolicPipelineSelectMode ( ) const { return systolicPipelineSelectMode ; }
2017-12-21 00:45:38 +01:00
2021-04-21 13:23:42 +02:00
void performKernelTuning ( CommandStreamReceiver & commandStreamReceiver , const Vec3 < size_t > & lws , const Vec3 < size_t > & gws , const Vec3 < size_t > & offsets , TimestampPacketContainer * timestampContainer ) ;
2020-12-04 09:42:34 +00:00
MOCKABLE_VIRTUAL bool isSingleSubdevicePreferred ( ) const ;
2022-10-06 09:50:38 +00:00
void setInlineSamplers ( ) ;
2020-11-18 13:56:18 +00:00
2022-07-24 04:21:16 +00:00
// residency for kernel surfaces
2018-02-16 09:15:36 +01:00
MOCKABLE_VIRTUAL void makeResident ( CommandStreamReceiver & commandStreamReceiver ) ;
2021-03-22 11:06:23 +00:00
MOCKABLE_VIRTUAL void getResidency ( std : : vector < Surface * > & dst ) ;
2017-12-21 00:45:38 +01:00
bool requiresCoherency ( ) ;
void resetSharedObjectsPatchAddresses ( ) ;
2018-11-14 08:40:37 +01:00
bool isUsingSharedObjArgs ( ) const { return usingSharedObjArgs ; }
2019-08-30 09:55:44 +02:00
bool hasUncacheableStatelessArgs ( ) const { return statelessUncacheableArgsCount > 0 ; }
2017-12-21 00:45:38 +01:00
2021-03-22 15:26:03 +00:00
bool hasPrintfOutput ( ) const ;
2017-12-21 00:45:38 +01:00
cl_int checkCorrectImageAccessQualifier ( cl_uint argIndex ,
size_t argSize ,
const void * argValue ) const ;
static uint32_t dummyPatchLocation ;
2019-03-21 12:51:20 +01:00
uint32_t allBufferArgsStateful = CL_TRUE ;
2020-11-12 14:25:05 +01:00
bool isBuiltIn = false ;
2017-12-21 00:45:38 +01:00
2020-01-24 15:06:55 +01:00
KernelExecutionType getExecutionType ( ) const {
return executionType ;
}
2019-11-12 13:59:37 +01:00
2021-03-22 15:26:03 +00:00
bool is32Bit ( ) const {
2021-04-08 11:05:45 +02:00
return kernelInfo . kernelDescriptor . kernelAttributes . gpuPointerSize = = 4 ;
2018-04-03 16:06:37 +02:00
}
2021-03-22 15:26:03 +00:00
size_t getPerThreadSystemThreadSurfaceSize ( ) const {
return kernelInfo . kernelDescriptor . kernelAttributes . perThreadSystemThreadSurfaceSize ;
2018-03-19 10:11:30 +01:00
}
2018-03-14 11:07:51 +01:00
std : : vector < PatchInfoData > & getPatchInfoDataList ( ) { return patchInfoDataList ; } ;
2021-06-14 16:23:08 +00:00
bool usesImages ( ) const {
return usingImages ;
}
2018-08-07 09:49:47 +02:00
bool usesOnlyImages ( ) const {
return usingImagesOnly ;
}
2018-03-14 11:07:51 +01:00
2022-04-05 16:47:19 +00:00
std : : unique_ptr < KernelObjsForAuxTranslation > fillWithKernelObjsForAuxTranslation ( ) ;
2018-08-07 15:09:16 +02:00
2019-02-21 16:59:10 +01:00
MOCKABLE_VIRTUAL bool requiresCacheFlushCommand ( const CommandQueue & commandQueue ) const ;
2018-12-06 15:33:02 +01:00
2019-01-31 14:47:55 +01:00
using CacheFlushAllocationsVec = StackVec < GraphicsAllocation * , 32 > ;
2021-03-22 15:26:03 +00:00
void getAllocationsForCacheFlush ( CacheFlushAllocationsVec & out ) const ;
2019-01-31 14:47:55 +01:00
2019-08-13 11:07:47 +02:00
void setAuxTranslationDirection ( AuxTranslationDirection auxTranslationDirection ) {
this - > auxTranslationDirection = auxTranslationDirection ;
2019-02-05 21:41:51 +01:00
}
2019-07-04 12:17:42 +02:00
void setUnifiedMemorySyncRequirement ( bool isUnifiedMemorySyncRequired ) {
this - > isUnifiedMemorySyncRequired = isUnifiedMemorySyncRequired ;
}
2019-06-13 15:49:35 +02:00
void setUnifiedMemoryProperty ( cl_kernel_exec_info infoType , bool infoValue ) ;
2019-06-14 12:48:40 +02:00
void setUnifiedMemoryExecInfo ( GraphicsAllocation * argValue ) ;
void clearUnifiedMemoryExecInfo ( ) ;
2019-02-05 21:41:51 +01:00
2019-08-23 10:35:57 +02:00
bool areStatelessWritesUsed ( ) { return containsStatelessWrites ; }
2019-12-17 17:04:57 +01:00
int setKernelThreadArbitrationPolicy ( uint32_t propertyValue ) ;
2020-01-21 19:02:36 +01:00
cl_int setKernelExecutionType ( cl_execution_info_kernel_type_intel executionType ) ;
2020-01-22 16:14:14 +01:00
void getSuggestedLocalWorkSize ( const cl_uint workDim , const size_t * globalWorkSize , const size_t * globalWorkOffset ,
2021-03-23 17:11:41 +00:00
size_t * localWorkSize ) ;
2020-10-06 16:21:46 +02:00
uint32_t getMaxWorkGroupCount ( const cl_uint workDim , const size_t * localWorkSize , const CommandQueue * commandQueue ) const ;
2019-11-07 18:49:46 +01:00
2022-06-27 17:20:50 +00:00
uint64_t getKernelStartAddress ( const bool localIdsGenerationByRuntime , const bool kernelUsesLocalIds , const bool isCssUsed , const bool returnFullAddress ) const ;
2020-01-23 15:52:49 +01:00
2020-06-19 17:24:48 +02:00
bool isKernelDebugEnabled ( ) const { return debugEnabled ; }
2020-09-02 11:38:54 +02:00
void setAdditionalKernelExecInfo ( uint32_t additionalKernelExecInfo ) ;
uint32_t getAdditionalKernelExecInfo ( ) const ;
2021-03-22 15:26:03 +00:00
MOCKABLE_VIRTUAL bool requiresWaDisableRccRhwoOptimization ( ) const ;
2020-01-29 14:15:10 +01:00
2022-07-24 04:21:16 +00:00
// dispatch traits
2021-03-22 11:06:23 +00:00
void setGlobalWorkOffsetValues ( uint32_t globalWorkOffsetX , uint32_t globalWorkOffsetY , uint32_t globalWorkOffsetZ ) ;
void setGlobalWorkSizeValues ( uint32_t globalWorkSizeX , uint32_t globalWorkSizeY , uint32_t globalWorkSizeZ ) ;
void setLocalWorkSizeValues ( uint32_t localWorkSizeX , uint32_t localWorkSizeY , uint32_t localWorkSizeZ ) ;
void setLocalWorkSize2Values ( uint32_t localWorkSizeX , uint32_t localWorkSizeY , uint32_t localWorkSizeZ ) ;
void setEnqueuedLocalWorkSizeValues ( uint32_t localWorkSizeX , uint32_t localWorkSizeY , uint32_t localWorkSizeZ ) ;
void setNumWorkGroupsValues ( uint32_t numWorkGroupsX , uint32_t numWorkGroupsY , uint32_t numWorkGroupsZ ) ;
void setWorkDim ( uint32_t workDim ) ;
2021-06-11 11:24:27 +00:00
const uint32_t * getDispatchTrait ( const CrossThreadDataOffset offset ) const {
return isValidOffset ( offset ) ? reinterpret_cast < uint32_t * > ( getCrossThreadData ( ) + offset )
: & Kernel : : dummyPatchLocation ;
}
const uint32_t * getWorkDim ( ) const { return getDispatchTrait ( getDescriptor ( ) . payloadMappings . dispatchTraits . workDim ) ; }
std : : array < const uint32_t * , 3 > getDispatchTraitArray ( const CrossThreadDataOffset dispatchTrait [ 3 ] ) const { return { getDispatchTrait ( dispatchTrait [ 0 ] ) , getDispatchTrait ( dispatchTrait [ 1 ] ) , getDispatchTrait ( dispatchTrait [ 2 ] ) } ; }
std : : array < const uint32_t * , 3 > getGlobalWorkOffsetValues ( ) const { return getDispatchTraitArray ( getDescriptor ( ) . payloadMappings . dispatchTraits . globalWorkOffset ) ; }
std : : array < const uint32_t * , 3 > getLocalWorkSizeValues ( ) const { return getDispatchTraitArray ( getDescriptor ( ) . payloadMappings . dispatchTraits . localWorkSize ) ; }
std : : array < const uint32_t * , 3 > getLocalWorkSize2Values ( ) const { return getDispatchTraitArray ( getDescriptor ( ) . payloadMappings . dispatchTraits . localWorkSize2 ) ; }
std : : array < const uint32_t * , 3 > getEnqueuedLocalWorkSizeValues ( ) const { return getDispatchTraitArray ( getDescriptor ( ) . payloadMappings . dispatchTraits . enqueuedLocalWorkSize ) ; }
std : : array < const uint32_t * , 3 > getNumWorkGroupsValues ( ) const { return getDispatchTraitArray ( getDescriptor ( ) . payloadMappings . dispatchTraits . numWorkGroups ) ; }
bool isLocalWorkSize2Patchable ( ) ;
2021-03-22 11:06:23 +00:00
uint32_t getMaxKernelWorkGroupSize ( ) const ;
uint32_t getSlmTotalSize ( ) const ;
2021-03-09 10:30:21 +00:00
bool getHasIndirectAccess ( ) const {
2021-02-23 02:33:22 -08:00
return this - > kernelHasIndirectAccess ;
}
2020-12-10 13:22:10 +00:00
2021-03-09 10:30:21 +00:00
MultiDeviceKernel * getMultiDeviceKernel ( ) const { return pMultiDeviceKernel ; }
void setMultiDeviceKernel ( MultiDeviceKernel * pMultiDeviceKernelToSet ) { pMultiDeviceKernel = pMultiDeviceKernelToSet ; }
2021-03-29 17:06:29 +00:00
bool areMultipleSubDevicesInContext ( ) const ;
2021-07-01 16:00:22 +00:00
bool requiresMemoryMigration ( ) const { return migratableArgsMap . size ( ) > 0 ; }
const std : : map < uint32_t , MemObj * > & getMemObjectsToMigrate ( ) const { return migratableArgsMap ; }
2021-09-08 13:20:44 +00:00
ImplicitArgs * getImplicitArgs ( ) const { return pImplicitArgs . get ( ) ; }
2022-02-22 12:24:30 +00:00
const HardwareInfo & getHardwareInfo ( ) const ;
2022-07-01 18:03:54 +00:00
bool isAnyKernelArgumentUsingSystemMemory ( ) const {
return anyKernelArgumentUsingSystemMemory ;
}
2021-03-03 12:25:26 +00:00
2022-07-15 19:47:17 +00:00
static bool graphicsAllocationTypeUseSystemMemory ( AllocationType type ) ;
void setDestinationAllocationInSystemMemory ( bool value ) {
isDestinationAllocationInSystemMemory = value ;
}
bool getDestinationAllocationInSystemMemory ( ) const {
return isDestinationAllocationInSystemMemory ;
}
2017-12-21 00:45:38 +01:00
protected :
2022-07-01 18:03:54 +00:00
struct KernelConfig {
Vec3 < size_t > gws ;
Vec3 < size_t > lws ;
Vec3 < size_t > offsets ;
bool operator = = ( const KernelConfig & other ) const { return this - > gws = = other . gws & & this - > lws = = other . lws & & this - > offsets = = other . offsets ; }
} ;
struct KernelConfigHash {
size_t operator ( ) ( KernelConfig const & config ) const {
auto hash = std : : hash < size_t > { } ;
size_t gwsHashX = hash ( config . gws . x ) ;
size_t gwsHashY = hash ( config . gws . y ) ;
size_t gwsHashZ = hash ( config . gws . z ) ;
size_t gwsHash = hashCombine ( gwsHashX , gwsHashY , gwsHashZ ) ;
size_t lwsHashX = hash ( config . lws . x ) ;
size_t lwsHashY = hash ( config . lws . y ) ;
size_t lwsHashZ = hash ( config . lws . z ) ;
size_t lwsHash = hashCombine ( lwsHashX , lwsHashY , lwsHashZ ) ;
size_t offsetsHashX = hash ( config . offsets . x ) ;
size_t offsetsHashY = hash ( config . offsets . y ) ;
size_t offsetsHashZ = hash ( config . offsets . z ) ;
size_t offsetsHash = hashCombine ( offsetsHashX , offsetsHashY , offsetsHashZ ) ;
return hashCombine ( gwsHash , lwsHash , offsetsHash ) ;
}
size_t hashCombine ( size_t hash1 , size_t hash2 , size_t hash3 ) const {
return ( hash1 ^ ( hash2 < < 1u ) ) ^ ( hash3 < < 2u ) ;
}
} ;
struct KernelSubmissionData {
std : : unique_ptr < TimestampPacketContainer > kernelStandardTimestamps ;
std : : unique_ptr < TimestampPacketContainer > kernelSubdeviceTimestamps ;
TunningStatus status ;
bool singleSubdevicePreferred = false ;
} ;
Kernel ( Program * programArg , const KernelInfo & kernelInfo , ClDevice & clDevice ) ;
void makeArgsResident ( CommandStreamReceiver & commandStreamReceiver ) ;
2017-12-21 00:45:38 +01:00
2021-04-08 11:05:45 +02:00
void * patchBufferOffset ( const ArgDescPointer & argAsPtr , void * svmPtr , GraphicsAllocation * svmAlloc ) ;
2017-12-21 00:45:38 +01:00
2021-03-23 17:11:41 +00:00
void patchWithImplicitSurface ( void * ptrToPatchInCrossThreadData , GraphicsAllocation & allocation , const ArgDescPointer & arg ) ;
2017-12-21 00:45:38 +01:00
void provideInitializationHints ( ) ;
2021-06-17 12:06:57 +00:00
void markArgPatchedAndResolveArgs ( uint32_t argIndex ) ;
2018-03-27 14:30:05 +02:00
void resolveArgs ( ) ;
2021-03-22 15:26:03 +00:00
void reconfigureKernel ( ) ;
2021-09-16 23:23:37 +00:00
bool hasDirectStatelessAccessToSharedBuffer ( ) const ;
2020-12-15 17:13:28 +00:00
bool hasDirectStatelessAccessToHostMemory ( ) const ;
2021-01-08 13:09:32 +00:00
bool hasIndirectStatelessAccessToHostMemory ( ) const ;
2018-09-21 14:06:35 +02:00
2021-01-26 11:44:10 +00:00
void addAllocationToCacheFlushVector ( uint32_t argIndex , GraphicsAllocation * argAllocation ) ;
2019-01-31 14:47:55 +01:00
bool allocationForCacheFlush ( GraphicsAllocation * argAllocation ) const ;
2020-11-17 11:08:06 +01:00
const ClDevice & getDevice ( ) const {
2021-03-19 11:22:17 +00:00
return clDevice ;
2020-11-17 11:08:06 +01:00
}
2021-09-07 11:10:18 +00:00
cl_int patchPrivateSurface ( ) ;
2020-11-24 16:07:54 +00:00
2022-07-01 18:03:54 +00:00
bool hasTunningFinished ( KernelSubmissionData & submissionData ) ;
bool hasRunFinished ( TimestampPacketContainer * timestampContainer ) ;
UnifiedMemoryControls unifiedMemoryControls { } ;
std : : map < uint32_t , MemObj * > migratableArgsMap { } ;
std : : unordered_map < KernelConfig , KernelSubmissionData , KernelConfigHash > kernelSubmissionMap ;
2017-12-21 00:45:38 +01:00
std : : vector < SimpleKernelArgInfo > kernelArguments ;
std : : vector < KernelArgHandler > kernelArgHandlers ;
std : : vector < GraphicsAllocation * > kernelSvmGfxAllocations ;
2019-06-14 12:48:40 +02:00
std : : vector < GraphicsAllocation * > kernelUnifiedMemoryGfxAllocations ;
2022-07-01 18:03:54 +00:00
std : : vector < PatchInfoData > patchInfoDataList ;
std : : vector < GraphicsAllocation * > kernelArgRequiresCacheFlush ;
std : : vector < size_t > slmSizes ;
2017-12-21 00:45:38 +01:00
2022-07-01 18:03:54 +00:00
std : : unique_ptr < ImageTransformer > imageTransformer ;
std : : unique_ptr < char [ ] > pSshLocal ;
std : : unique_ptr < ImplicitArgs > pImplicitArgs = nullptr ;
2019-02-05 21:41:51 +01:00
2022-07-01 18:03:54 +00:00
uint64_t privateSurfaceSize = 0u ;
2018-03-14 11:07:51 +01:00
2022-07-01 18:03:54 +00:00
size_t numberOfBindingTableStates = 0u ;
size_t localBindingTableOffset = 0u ;
2018-11-14 08:40:37 +01:00
2022-07-01 18:03:54 +00:00
const ExecutionEnvironment & executionEnvironment ;
Program * program ;
ClDevice & clDevice ;
const KernelInfo & kernelInfo ;
GraphicsAllocation * privateSurface = nullptr ;
MultiDeviceKernel * pMultiDeviceKernel = nullptr ;
2020-11-18 15:02:22 +01:00
2021-03-22 11:06:23 +00:00
uint32_t * maxWorkGroupSizeForCrossThreadData = & Kernel : : dummyPatchLocation ;
uint32_t * dataParameterSimdSize = & Kernel : : dummyPatchLocation ;
uint32_t * parentEventOffset = & Kernel : : dummyPatchLocation ;
uint32_t * preferredWkgMultipleOffset = & Kernel : : dummyPatchLocation ;
2022-07-01 18:03:54 +00:00
char * crossThreadData = nullptr ;
2020-12-10 13:22:10 +00:00
2022-07-01 18:03:54 +00:00
AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection : : None ;
KernelExecutionType executionType = KernelExecutionType : : Default ;
2020-12-10 13:22:10 +00:00
2022-07-01 18:03:54 +00:00
uint32_t patchedArgumentsNum = 0 ;
uint32_t startOffset = 0 ;
uint32_t statelessUncacheableArgsCount = 0 ;
uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo : : DisableOverdispatch ;
uint32_t maxKernelWorkGroupSize = 0 ;
uint32_t slmTotalSize = 0u ;
2021-03-22 11:06:23 +00:00
uint32_t sshLocalSize = 0u ;
uint32_t crossThreadDataSize = 0u ;
2020-11-18 18:39:32 +00:00
2022-07-01 18:03:54 +00:00
bool containsStatelessWrites = true ;
bool usingSharedObjArgs = false ;
bool usingImages = false ;
bool usingImagesOnly = false ;
bool auxTranslationRequired = false ;
2022-08-31 13:26:29 +00:00
bool systolicPipelineSelectMode = false ;
2022-07-01 18:03:54 +00:00
bool svmAllocationsRequireCacheFlush = false ;
bool isUnifiedMemorySyncRequired = true ;
bool debugEnabled = false ;
2021-08-03 18:10:53 +00:00
bool singleSubdevicePreferredInCurrentEnqueue = false ;
2021-02-23 02:33:22 -08:00
bool kernelHasIndirectAccess = true ;
2022-07-01 18:03:54 +00:00
bool anyKernelArgumentUsingSystemMemory = false ;
2022-07-15 19:47:17 +00:00
bool isDestinationAllocationInSystemMemory = false ;
2017-12-21 00:45:38 +01:00
} ;
2020-12-22 00:03:25 +00:00
2019-03-26 11:59:46 +01:00
} // namespace NEO