2017-12-21 07:45:38 +08:00
/*
2018-03-08 18:56:44 +08:00
* Copyright ( c ) 2017 - 2018 , Intel Corporation
2017-12-21 07:45:38 +08:00
*
* Permission is hereby granted , free of charge , to any person obtaining a
* copy of this software and associated documentation files ( the " Software " ) ,
* to deal in the Software without restriction , including without limitation
* the rights to use , copy , modify , merge , publish , distribute , sublicense ,
* and / or sell copies of the Software , and to permit persons to whom the
* Software is furnished to do so , subject to the following conditions :
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Software .
*
* THE SOFTWARE IS PROVIDED " AS IS " , WITHOUT WARRANTY OF ANY KIND , EXPRESS
* OR IMPLIED , INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY ,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT . IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM , DAMAGES OR
* OTHER LIABILITY , WHETHER IN AN ACTION OF CONTRACT , TORT OR OTHERWISE ,
* ARISING FROM , OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE .
*/
# include "runtime/helpers/aligned_memory.h"
# include "runtime/helpers/debug_helpers.h"
# include "runtime/helpers/hash.h"
# include "runtime/helpers/ptr_math.h"
# include "runtime/helpers/string.h"
# include "runtime/memory_manager/memory_manager.h"
# include "patch_list.h"
# include "patch_shared.h"
2018-05-02 20:27:55 +08:00
# include "program_debug_data.h"
2017-12-21 07:45:38 +08:00
# include "program.h"
# include "runtime/kernel/kernel.h"
# include <algorithm>
using namespace iOpenCL ;
namespace OCLRT {
extern bool familyEnabled [ ] ;
const KernelInfo * Program : : getKernelInfo (
const char * kernelName ) const {
if ( kernelName = = nullptr ) {
return nullptr ;
}
auto it = std : : find_if ( kernelInfoArray . begin ( ) , kernelInfoArray . end ( ) ,
[ = ] ( const KernelInfo * kInfo ) { return ( 0 = = strcmp ( kInfo - > name . c_str ( ) , kernelName ) ) ; } ) ;
return ( it ! = kernelInfoArray . end ( ) ) ? * it : nullptr ;
}
size_t Program : : getNumKernels ( ) const {
return kernelInfoArray . size ( ) ;
}
const KernelInfo * Program : : getKernelInfo ( size_t ordinal ) const {
DEBUG_BREAK_IF ( ordinal > = kernelInfoArray . size ( ) ) ;
return kernelInfoArray [ ordinal ] ;
}
std : : string Program : : getKernelNamesString ( ) const {
std : : string semiColonDelimitedKernelNameStr ;
for ( uint32_t i = 0 ; i < kernelInfoArray . size ( ) ; i + + ) {
semiColonDelimitedKernelNameStr + = kernelInfoArray [ i ] - > name ;
if ( ( i + 1 ) ! = kernelInfoArray . size ( ) ) {
semiColonDelimitedKernelNameStr + = " ; " ;
}
}
return semiColonDelimitedKernelNameStr ;
}
size_t Program : : processKernel (
const void * pKernelBlob ,
cl_int & retVal ) {
size_t sizeProcessed = 0 ;
do {
auto pKernelInfo = KernelInfo : : create ( ) ;
if ( ! pKernelInfo ) {
retVal = CL_OUT_OF_HOST_MEMORY ;
break ;
}
auto pCurKernelPtr = pKernelBlob ;
pKernelInfo - > heapInfo . pBlob = pKernelBlob ;
pKernelInfo - > heapInfo . pKernelHeader = reinterpret_cast < const SKernelBinaryHeaderCommon * > ( pCurKernelPtr ) ;
pCurKernelPtr = ptrOffset ( pCurKernelPtr , sizeof ( SKernelBinaryHeaderCommon ) ) ;
std : : string readName { reinterpret_cast < const char * > ( pCurKernelPtr ) , pKernelInfo - > heapInfo . pKernelHeader - > KernelNameSize } ;
pKernelInfo - > name = readName . c_str ( ) ;
pCurKernelPtr = ptrOffset ( pCurKernelPtr , pKernelInfo - > heapInfo . pKernelHeader - > KernelNameSize ) ;
pKernelInfo - > heapInfo . pKernelHeap = pCurKernelPtr ;
pCurKernelPtr = ptrOffset ( pCurKernelPtr , pKernelInfo - > heapInfo . pKernelHeader - > KernelHeapSize ) ;
pKernelInfo - > heapInfo . pGsh = pCurKernelPtr ;
pCurKernelPtr = ptrOffset ( pCurKernelPtr , pKernelInfo - > heapInfo . pKernelHeader - > GeneralStateHeapSize ) ;
pKernelInfo - > heapInfo . pDsh = pCurKernelPtr ;
pCurKernelPtr = ptrOffset ( pCurKernelPtr , pKernelInfo - > heapInfo . pKernelHeader - > DynamicStateHeapSize ) ;
pKernelInfo - > heapInfo . pSsh = const_cast < void * > ( pCurKernelPtr ) ;
pCurKernelPtr = ptrOffset ( pCurKernelPtr , pKernelInfo - > heapInfo . pKernelHeader - > SurfaceStateHeapSize ) ;
pKernelInfo - > heapInfo . pPatchList = pCurKernelPtr ;
retVal = parsePatchList ( * pKernelInfo ) ;
if ( retVal ! = CL_SUCCESS ) {
delete pKernelInfo ;
sizeProcessed = ptrDiff ( pCurKernelPtr , pKernelBlob ) ;
break ;
}
auto pKernelHeader = pKernelInfo - > heapInfo . pKernelHeader ;
auto pKernel = ptrOffset ( pKernelBlob , sizeof ( SKernelBinaryHeaderCommon ) ) ;
if ( genBinary )
pKernelInfo - > gpuPointerSize = reinterpret_cast < const SProgramBinaryHeader * > ( genBinary ) - > GPUPointerSizeInBytes ;
uint32_t kernelSize =
pKernelHeader - > DynamicStateHeapSize +
pKernelHeader - > GeneralStateHeapSize +
pKernelHeader - > KernelHeapSize +
pKernelHeader - > KernelNameSize +
pKernelHeader - > PatchListSize +
pKernelHeader - > SurfaceStateHeapSize ;
pKernelInfo - > heapInfo . blobSize = kernelSize + sizeof ( SKernelBinaryHeaderCommon ) ;
uint32_t kernelCheckSum = pKernelInfo - > heapInfo . pKernelHeader - > CheckSum ;
uint64_t hashValue = Hash : : hash ( reinterpret_cast < const char * > ( pKernel ) , kernelSize ) ;
uint32_t calcCheckSum = hashValue & 0xFFFFFFFF ;
pKernelInfo - > isValid = ( calcCheckSum = = kernelCheckSum ) ;
retVal = CL_SUCCESS ;
sizeProcessed = sizeof ( SKernelBinaryHeaderCommon ) + kernelSize ;
kernelInfoArray . push_back ( pKernelInfo ) ;
if ( pKernelInfo - > hasDeviceEnqueue ( ) ) {
parentKernelInfoArray . push_back ( pKernelInfo ) ;
}
if ( pKernelInfo - > requiresSubgroupIndependentForwardProgress ( ) ) {
subgroupKernelInfoArray . push_back ( pKernelInfo ) ;
}
} while ( false ) ;
return sizeProcessed ;
}
cl_int Program : : parsePatchList ( KernelInfo & kernelInfo ) {
cl_int retVal = CL_SUCCESS ;
auto pPatchList = kernelInfo . heapInfo . pPatchList ;
auto patchListSize = kernelInfo . heapInfo . pKernelHeader - > PatchListSize ;
auto pCurPatchListPtr = pPatchList ;
uint32_t PrivateMemoryStatelessSizeOffset = 0xFFffFFff ;
uint32_t LocalMemoryStatelessWindowSizeOffset = 0xFFffFFff ;
uint32_t LocalMemoryStatelessWindowStartAddressOffset = 0xFFffFFff ;
//Speed up containers by giving some pre-allocated storage
kernelInfo . kernelArgInfo . reserve ( 10 ) ;
kernelInfo . patchInfo . kernelArgumentInfo . reserve ( 10 ) ;
kernelInfo . patchInfo . dataParameterBuffers . reserve ( 20 ) ;
DBG_LOG ( LogPatchTokens , " \n PATCH_TOKENs for kernel " , kernelInfo . name ) ;
while ( ptrDiff ( pCurPatchListPtr , pPatchList ) < patchListSize ) {
uint32_t index = 0 ;
uint32_t argNum = 0 ;
auto pPatch = reinterpret_cast < const SPatchItemHeader * > ( pCurPatchListPtr ) ;
const SPatchDataParameterBuffer * pDataParameterBuffer = nullptr ;
switch ( pPatch - > Token ) {
case PATCH_TOKEN_SAMPLER_STATE_ARRAY :
kernelInfo . patchInfo . samplerStateArray =
reinterpret_cast < const SPatchSamplerStateArray * > ( pPatch ) ;
DBG_LOG ( LogPatchTokens ,
" \n .SAMPLER_STATE_ARRAY " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .Offset " , kernelInfo . patchInfo . samplerStateArray - > Offset ,
" \n .Count " , kernelInfo . patchInfo . samplerStateArray - > Count ,
" \n .BorderColorOffset " , kernelInfo . patchInfo . samplerStateArray - > BorderColorOffset ) ;
break ;
case PATCH_TOKEN_BINDING_TABLE_STATE :
kernelInfo . patchInfo . bindingTableState =
reinterpret_cast < const SPatchBindingTableState * > ( pPatch ) ;
kernelInfo . usesSsh = ( kernelInfo . patchInfo . bindingTableState - > Count > 0 ) ;
DBG_LOG ( LogPatchTokens ,
" \n .BINDING_TABLE_STATE " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .Offset " , kernelInfo . patchInfo . bindingTableState - > Offset ,
" \n .Count " , kernelInfo . patchInfo . bindingTableState - > Count ,
" \n .SurfaceStateOffset " , kernelInfo . patchInfo . bindingTableState - > SurfaceStateOffset ) ;
break ;
case PATCH_TOKEN_ALLOCATE_LOCAL_SURFACE :
kernelInfo . patchInfo . localsurface =
reinterpret_cast < const SPatchAllocateLocalSurface * > ( pPatch ) ;
kernelInfo . workloadInfo . slmStaticSize = kernelInfo . patchInfo . localsurface - > TotalInlineLocalMemorySize ;
DBG_LOG ( LogPatchTokens ,
" \n .ALLOCATE_LOCAL_SURFACE " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .TotalInlineLocalMemorySize " , kernelInfo . patchInfo . localsurface - > TotalInlineLocalMemorySize ) ;
break ;
case PATCH_TOKEN_MEDIA_VFE_STATE :
kernelInfo . patchInfo . mediavfestate =
reinterpret_cast < const SPatchMediaVFEState * > ( pPatch ) ;
DBG_LOG ( LogPatchTokens ,
" \n .MEDIA_VFE_STATE " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ScratchSpaceOffset " , kernelInfo . patchInfo . mediavfestate - > ScratchSpaceOffset ,
" \n .PerThreadScratchSpace " , kernelInfo . patchInfo . mediavfestate - > PerThreadScratchSpace ) ;
break ;
case PATCH_TOKEN_DATA_PARAMETER_BUFFER :
DBG_LOG ( LogPatchTokens ,
" \n .DATA_PARAMETER_BUFFER " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ) ;
pDataParameterBuffer = reinterpret_cast < const SPatchDataParameterBuffer * > ( pPatch ) ;
kernelInfo . patchInfo . dataParameterBuffers . push_back (
pDataParameterBuffer ) ;
argNum = pDataParameterBuffer - > ArgumentNumber ;
switch ( pDataParameterBuffer - > Type ) {
case DATA_PARAMETER_KERNEL_ARGUMENT :
kernelInfo . storeKernelArgument ( pDataParameterBuffer ) ;
DBG_LOG ( LogPatchTokens , " \n .Type " , " KERNEL_ARGUMENT " ) ;
break ;
case DATA_PARAMETER_LOCAL_WORK_SIZE : {
DBG_LOG ( LogPatchTokens , " \n .Type " , " LOCAL_WORK_SIZE " ) ;
index = pDataParameterBuffer - > SourceOffset / sizeof ( uint32_t ) ;
if ( kernelInfo . workloadInfo . localWorkSizeOffsets [ 2 ] = = WorkloadInfo : : undefinedOffset ) {
kernelInfo . workloadInfo . localWorkSizeOffsets [ index ] =
pDataParameterBuffer - > Offset ;
} else {
kernelInfo . workloadInfo . localWorkSizeOffsets2 [ index ] =
pDataParameterBuffer - > Offset ;
}
break ;
}
case DATA_PARAMETER_GLOBAL_WORK_OFFSET :
DBG_LOG ( LogPatchTokens , " \n .Type " , " GLOBAL_WORK_OFFSET " ) ;
index = pDataParameterBuffer - > SourceOffset / sizeof ( uint32_t ) ;
kernelInfo . workloadInfo . globalWorkOffsetOffsets [ index ] =
pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_ENQUEUED_LOCAL_WORK_SIZE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " ENQUEUED_LOCAL_WORK_SIZE " ) ;
index = pDataParameterBuffer - > SourceOffset / sizeof ( uint32_t ) ;
kernelInfo . workloadInfo . enqueuedLocalWorkSizeOffsets [ index ] =
pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_GLOBAL_WORK_SIZE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " GLOBAL_WORK_SIZE " ) ;
index = pDataParameterBuffer - > SourceOffset / sizeof ( uint32_t ) ;
kernelInfo . workloadInfo . globalWorkSizeOffsets [ index ] =
pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_NUM_WORK_GROUPS :
DBG_LOG ( LogPatchTokens , " \n .Type " , " NUM_WORK_GROUPS " ) ;
index = pDataParameterBuffer - > SourceOffset / sizeof ( uint32_t ) ;
kernelInfo . workloadInfo . numWorkGroupsOffset [ index ] =
pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_MAX_WORKGROUP_SIZE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " MAX_WORKGROUP_SIZE " ) ;
kernelInfo . workloadInfo . maxWorkGroupSizeOffset = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_WORK_DIMENSIONS :
DBG_LOG ( LogPatchTokens , " \n .Type " , " WORK_DIMENSIONS " ) ;
kernelInfo . workloadInfo . workDimOffset = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES : {
DBG_LOG ( LogPatchTokens , " \n .Type " , " SUM_OF_LOCAL_MEMORY_OBJECT_ARGUMENT_SIZES " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
KernelArgPatchInfo kernelArgPatchInfo ;
kernelArgPatchInfo . size = pDataParameterBuffer - > DataSize ;
kernelArgPatchInfo . crossthreadOffset = pDataParameterBuffer - > Offset ;
kernelInfo . kernelArgInfo [ argNum ] . slmAlignment = pDataParameterBuffer - > SourceOffset ;
kernelInfo . kernelArgInfo [ argNum ] . kernelArgPatchInfoVector . push_back ( kernelArgPatchInfo ) ;
} break ;
case DATA_PARAMETER_IMAGE_WIDTH :
DBG_LOG ( LogPatchTokens , " \n .Type " , " IMAGE_WIDTH " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetImgWidth = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_IMAGE_HEIGHT :
DBG_LOG ( LogPatchTokens , " \n .Type " , " IMAGE_HEIGHT " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetImgHeight = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_IMAGE_DEPTH :
DBG_LOG ( LogPatchTokens , " \n .Type " , " IMAGE_DEPTH " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetImgDepth = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_SAMPLER_COORDINATE_SNAP_WA_REQUIRED :
DBG_LOG ( LogPatchTokens , " \n .Type " , " SAMPLER_COORDINATE_SNAP_WA_REQUIRED " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetSamplerSnapWa = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_SAMPLER_ADDRESS_MODE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " SAMPLER_ADDRESS_MODE " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetSamplerAddressingMode = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_SAMPLER_NORMALIZED_COORDS :
DBG_LOG ( LogPatchTokens , " \n .Type " , " SAMPLER_ADDRESS_MODE " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetSamplerNormalizedCoords = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_IMAGE_CHANNEL_DATA_TYPE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " SAMPLER_ADDRESS_MODE " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetChannelDataType = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_IMAGE_CHANNEL_ORDER :
DBG_LOG ( LogPatchTokens , " \n .Type " , " IMAGE_CHANNEL_ORDER " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetChannelOrder = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_IMAGE_ARRAY_SIZE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " IMAGE_ARRAY_SIZE " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetArraySize = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_OBJECT_ID :
DBG_LOG ( LogPatchTokens , " \n .Type " , " OBJECT_ID " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetObjectId = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_SIMD_SIZE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " SIMD_SIZE " ) ;
kernelInfo . workloadInfo . simdSizeOffset = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_PARENT_EVENT :
DBG_LOG ( LogPatchTokens , " \n .Type " , " PARENT_EVENT " ) ;
kernelInfo . workloadInfo . parentEventOffset = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " CHILD_BLOCK_SIMD_SIZE " ) ;
kernelInfo . childrenKernelsIdOffset . push_back ( { argNum , pDataParameterBuffer - > Offset } ) ;
break ;
case DATA_PARAMETER_PRIVATE_MEMORY_STATELESS_SIZE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " PRIVATE_MEMORY_STATELESS_SIZE " ) ;
PrivateMemoryStatelessSizeOffset = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_SIZE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " LOCAL_MEMORY_STATELESS_WINDOW_SIZE " ) ;
LocalMemoryStatelessWindowSizeOffset = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS :
DBG_LOG ( LogPatchTokens , " \n .Type " , " LOCAL_MEMORY_STATELESS_WINDOW_START_ADDRESS " ) ;
LocalMemoryStatelessWindowStartAddressOffset = pDataParameterBuffer - > Offset ;
pDevice - > prepareSLMWindow ( ) ;
break ;
case DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " PREFERRED_WORKGROUP_MULTIPLE " ) ;
kernelInfo . workloadInfo . prefferedWkgMultipleOffset = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_BUFFER_OFFSET :
DBG_LOG ( LogPatchTokens , " \n .Type " , " DATA_PARAMETER_BUFFER_OFFSET " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetBufferOffset = pDataParameterBuffer - > Offset ;
break ;
case DATA_PARAMETER_NUM_HARDWARE_THREADS :
case DATA_PARAMETER_PRINTF_SURFACE_SIZE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " Unhandled " , pDataParameterBuffer - > Type ) ;
printDebugString ( DebugManager . flags . PrintDebugMessages . get ( ) , stderr ,
" Program::parsePatchList.Unhandled Data parameter: %d \n " , pDataParameterBuffer - > Type ) ;
break ;
case DATA_PARAMETER_VME_MB_BLOCK_TYPE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " VME_MB_BLOCK_TYPE " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetVmeMbBlockType = pDataParameterBuffer - > Offset ;
DEBUG_BREAK_IF ( pDataParameterBuffer - > DataSize ! = sizeof ( uint32_t ) ) ;
break ;
case DATA_PARAMETER_VME_SUBPIXEL_MODE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " VME_SUBPIXEL_MODE " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetVmeSubpixelMode = pDataParameterBuffer - > Offset ;
DEBUG_BREAK_IF ( pDataParameterBuffer - > DataSize ! = sizeof ( uint32_t ) ) ;
break ;
case DATA_PARAMETER_VME_SAD_ADJUST_MODE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " VME_SAD_ADJUST_MODE " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetVmeSadAdjustMode = pDataParameterBuffer - > Offset ;
DEBUG_BREAK_IF ( pDataParameterBuffer - > DataSize ! = sizeof ( uint32_t ) ) ;
break ;
case DATA_PARAMETER_VME_SEARCH_PATH_TYPE :
DBG_LOG ( LogPatchTokens , " \n .Type " , " VME_SEARCH_PATH_TYPE " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetVmeSearchPathType = pDataParameterBuffer - > Offset ;
DEBUG_BREAK_IF ( pDataParameterBuffer - > DataSize ! = sizeof ( uint32_t ) ) ;
break ;
case DATA_PARAMETER_IMAGE_NUM_SAMPLES :
DBG_LOG ( LogPatchTokens , " \n .Type " , " IMAGE_NUM_SAMPLES " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetNumSamples = pDataParameterBuffer - > Offset ;
DEBUG_BREAK_IF ( pDataParameterBuffer - > DataSize ! = sizeof ( uint32_t ) ) ;
break ;
case DATA_PARAMETER_IMAGE_NUM_MIP_LEVELS :
2018-04-05 23:14:12 +08:00
DBG_LOG ( LogPatchTokens , " \n .Type " , " IMAGE_NUM_MIP_LEVELS " ) ;
kernelInfo . resizeKernelArgInfoAndRegisterParameter ( argNum ) ;
kernelInfo . kernelArgInfo [ argNum ] . offsetNumMipLevels = pDataParameterBuffer - > Offset ;
DEBUG_BREAK_IF ( pDataParameterBuffer - > DataSize ! = sizeof ( uint32_t ) ) ;
break ;
2017-12-21 07:45:38 +08:00
case DATA_PARAMETER_IMAGE_SRGB_CHANNEL_ORDER :
case DATA_PARAMETER_STAGE_IN_GRID_ORIGIN :
case DATA_PARAMETER_STAGE_IN_GRID_SIZE :
break ;
case DATA_PARAMETER_LOCAL_ID :
case DATA_PARAMETER_EXECUTION_MASK :
case DATA_PARAMETER_VME_IMAGE_TYPE :
case DATA_PARAMETER_VME_MB_SKIP_BLOCK_TYPE :
break ;
default :
DBG_LOG ( LogPatchTokens , " \n .Type " , " Unhandled " , pDataParameterBuffer - > Type ) ;
DEBUG_BREAK_IF ( true ) ;
}
DBG_LOG ( LogPatchTokens ,
" \n .ArgumentNumber " , pDataParameterBuffer - > ArgumentNumber ,
" \n .Offset " , pDataParameterBuffer - > Offset ,
" \n .DataSize " , pDataParameterBuffer - > DataSize ,
" \n .SourceOffset " , pDataParameterBuffer - > SourceOffset ) ;
break ;
case PATCH_TOKEN_MEDIA_INTERFACE_DESCRIPTOR_LOAD :
kernelInfo . patchInfo . interfaceDescriptorDataLoad =
reinterpret_cast < const SPatchMediaInterfaceDescriptorLoad * > ( pPatch ) ;
DBG_LOG ( LogPatchTokens ,
" \n .MEDIA_INTERFACE_DESCRIPTOR_LOAD " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .InterfaceDescriptorDataOffset " , kernelInfo . patchInfo . interfaceDescriptorDataLoad - > InterfaceDescriptorDataOffset ) ;
break ;
case PATCH_TOKEN_INTERFACE_DESCRIPTOR_DATA :
kernelInfo . patchInfo . interfaceDescriptorData =
reinterpret_cast < const SPatchInterfaceDescriptorData * > ( pPatch ) ;
DBG_LOG ( LogPatchTokens ,
" \n .INTERFACE_DESCRIPTOR_DATA " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .Offset " , kernelInfo . patchInfo . interfaceDescriptorData - > Offset ,
" \n .SamplerStateOffset " , kernelInfo . patchInfo . interfaceDescriptorData - > SamplerStateOffset ,
" \n .KernelOffset " , kernelInfo . patchInfo . interfaceDescriptorData - > KernelOffset ,
" \n .BindingTableOffset " , kernelInfo . patchInfo . interfaceDescriptorData - > BindingTableOffset ) ;
break ;
case PATCH_TOKEN_THREAD_PAYLOAD :
kernelInfo . patchInfo . threadPayload =
reinterpret_cast < const SPatchThreadPayload * > ( pPatch ) ;
DBG_LOG ( LogPatchTokens ,
" \n .THREAD_PAYLOAD " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .HeaderPresent " , kernelInfo . patchInfo . threadPayload - > HeaderPresent ,
" \n .LocalIDXPresent " , kernelInfo . patchInfo . threadPayload - > LocalIDXPresent ,
" \n .LocalIDYPresent " , kernelInfo . patchInfo . threadPayload - > LocalIDYPresent ,
" \n .LocalIDZPresent " , kernelInfo . patchInfo . threadPayload - > LocalIDZPresent ,
" \n .LocalIDFlattenedPresent " , kernelInfo . patchInfo . threadPayload - > LocalIDFlattenedPresent ,
" \n .IndirectPayloadStorage " , kernelInfo . patchInfo . threadPayload - > IndirectPayloadStorage ,
" \n .UnusedPerThreadConstantPresent " , kernelInfo . patchInfo . threadPayload - > UnusedPerThreadConstantPresent ,
" \n .GetLocalIDPresent " , kernelInfo . patchInfo . threadPayload - > GetLocalIDPresent ,
" \n .GetGroupIDPresent " , kernelInfo . patchInfo . threadPayload - > GetGroupIDPresent ,
" \n .GetGlobalOffsetPresent " , kernelInfo . patchInfo . threadPayload - > GetGlobalOffsetPresent ) ;
break ;
case PATCH_TOKEN_EXECUTION_ENVIRONMENT :
kernelInfo . patchInfo . executionEnvironment =
reinterpret_cast < const SPatchExecutionEnvironment * > ( pPatch ) ;
if ( kernelInfo . patchInfo . executionEnvironment - > RequiredWorkGroupSizeX ! = 0 ) {
kernelInfo . reqdWorkGroupSize [ 0 ] = kernelInfo . patchInfo . executionEnvironment - > RequiredWorkGroupSizeX ;
kernelInfo . reqdWorkGroupSize [ 1 ] = kernelInfo . patchInfo . executionEnvironment - > RequiredWorkGroupSizeY ;
kernelInfo . reqdWorkGroupSize [ 2 ] = kernelInfo . patchInfo . executionEnvironment - > RequiredWorkGroupSizeZ ;
DEBUG_BREAK_IF ( ! ( kernelInfo . patchInfo . executionEnvironment - > RequiredWorkGroupSizeY > 0 ) ) ;
DEBUG_BREAK_IF ( ! ( kernelInfo . patchInfo . executionEnvironment - > RequiredWorkGroupSizeZ > 0 ) ) ;
}
if ( kernelInfo . patchInfo . executionEnvironment - > CompiledForGreaterThan4GBBuffers = = false ) {
kernelInfo . requiresSshForBuffers = true ;
}
DBG_LOG ( LogPatchTokens ,
" \n .EXECUTION_ENVIRONMENT " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .RequiredWorkGroupSizeX " , kernelInfo . patchInfo . executionEnvironment - > RequiredWorkGroupSizeX ,
" \n .RequiredWorkGroupSizeY " , kernelInfo . patchInfo . executionEnvironment - > RequiredWorkGroupSizeY ,
" \n .RequiredWorkGroupSizeZ " , kernelInfo . patchInfo . executionEnvironment - > RequiredWorkGroupSizeZ ,
" \n .LargestCompiledSIMDSize " , kernelInfo . patchInfo . executionEnvironment - > LargestCompiledSIMDSize ,
" \n .CompiledSubGroupsNumber " , kernelInfo . patchInfo . executionEnvironment - > CompiledSubGroupsNumber ,
" \n .HasBarriers " , kernelInfo . patchInfo . executionEnvironment - > HasBarriers ,
" \n .DisableMidThreadPreemption " , kernelInfo . patchInfo . executionEnvironment - > DisableMidThreadPreemption ,
" \n .CompiledSIMD8 " , kernelInfo . patchInfo . executionEnvironment - > CompiledSIMD8 ,
" \n .CompiledSIMD16 " , kernelInfo . patchInfo . executionEnvironment - > CompiledSIMD16 ,
" \n .CompiledSIMD32 " , kernelInfo . patchInfo . executionEnvironment - > CompiledSIMD32 ,
" \n .HasDeviceEnqueue " , kernelInfo . patchInfo . executionEnvironment - > HasDeviceEnqueue ,
" \n .MayAccessUndeclaredResource " , kernelInfo . patchInfo . executionEnvironment - > MayAccessUndeclaredResource ,
" \n .UsesFencesForReadWriteImages " , kernelInfo . patchInfo . executionEnvironment - > UsesFencesForReadWriteImages ,
" \n .UsesStatelessSpillFill " , kernelInfo . patchInfo . executionEnvironment - > UsesStatelessSpillFill ,
" \n .IsCoherent " , kernelInfo . patchInfo . executionEnvironment - > IsCoherent ,
" \n .SubgroupIndependentForwardProgressRequired " , kernelInfo . patchInfo . executionEnvironment - > SubgroupIndependentForwardProgressRequired ) ;
break ;
case PATCH_TOKEN_DATA_PARAMETER_STREAM :
kernelInfo . patchInfo . dataParameterStream =
reinterpret_cast < const SPatchDataParameterStream * > ( pPatch ) ;
DBG_LOG ( LogPatchTokens ,
" \n .DATA_PARAMETER_STREAM " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .DataParameterStreamSize " , kernelInfo . patchInfo . dataParameterStream - > DataParameterStreamSize ) ;
break ;
case PATCH_TOKEN_KERNEL_ARGUMENT_INFO : {
auto pkernelArgInfo = reinterpret_cast < const SPatchKernelArgumentInfo * > ( pPatch ) ;
kernelInfo . storeArgInfo ( pkernelArgInfo ) ;
DBG_LOG ( LogPatchTokens ,
" \n .KERNEL_ARGUMENT_INFO " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ArgumentNumber " , pkernelArgInfo - > ArgumentNumber ,
" \n .AddressQualifierSize " , pkernelArgInfo - > AddressQualifierSize ,
" \n .AccessQualifierSize " , pkernelArgInfo - > AccessQualifierSize ,
" \n .ArgumentNameSize " , pkernelArgInfo - > ArgumentNameSize ,
" \n .TypeNameSize " , pkernelArgInfo - > TypeNameSize ,
" \n .TypeQualifierSize " , pkernelArgInfo - > TypeQualifierSize ) ;
break ;
}
case PATCH_TOKEN_KERNEL_ATTRIBUTES_INFO :
kernelInfo . patchInfo . pKernelAttributesInfo =
reinterpret_cast < const SPatchKernelAttributesInfo * > ( pPatch ) ;
kernelInfo . storePatchToken ( kernelInfo . patchInfo . pKernelAttributesInfo ) ;
DBG_LOG ( LogPatchTokens ,
" \n .KERNEL_ATTRIBUTES_INFO " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .AttributesSize " , kernelInfo . patchInfo . pKernelAttributesInfo - > AttributesSize ) ;
break ;
case PATCH_TOKEN_SAMPLER_KERNEL_ARGUMENT : {
const SPatchSamplerKernelArgument * pSamplerKernelObjectKernelArg = nullptr ;
pSamplerKernelObjectKernelArg = reinterpret_cast < const SPatchSamplerKernelArgument * > ( pPatch ) ;
kernelInfo . storeKernelArgument ( pSamplerKernelObjectKernelArg ) ;
DBG_LOG ( LogPatchTokens ,
" \n .SAMPLER_KERNEL_ARGUMENT " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ArgumentNumber " , pSamplerKernelObjectKernelArg - > ArgumentNumber ,
" \n .Type " , pSamplerKernelObjectKernelArg - > Type ,
" \n .Offset " , pSamplerKernelObjectKernelArg - > Offset ) ;
} ;
break ;
case PATCH_TOKEN_IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT : {
const SPatchImageMemoryObjectKernelArgument * pImageMemObjectKernelArg = nullptr ;
pImageMemObjectKernelArg =
reinterpret_cast < const SPatchImageMemoryObjectKernelArgument * > ( pPatch ) ;
kernelInfo . storeKernelArgument ( pImageMemObjectKernelArg ) ;
DBG_LOG ( LogPatchTokens ,
" \n .IMAGE_MEMORY_OBJECT_KERNEL_ARGUMENT " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ArgumentNumber " , pImageMemObjectKernelArg - > ArgumentNumber ,
" \n .Type " , pImageMemObjectKernelArg - > Type ,
" \n .Offset " , pImageMemObjectKernelArg - > Offset ,
" \n .LocationIndex " , pImageMemObjectKernelArg - > LocationIndex ,
" \n .LocationIndex2 " , pImageMemObjectKernelArg - > LocationIndex2 ,
" \n .Transformable " , pImageMemObjectKernelArg - > Transformable ) ;
} ;
break ;
case PATCH_TOKEN_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT : {
const SPatchGlobalMemoryObjectKernelArgument * pGlobalMemObjectKernelArg = nullptr ;
pGlobalMemObjectKernelArg =
reinterpret_cast < const SPatchGlobalMemoryObjectKernelArgument * > ( pPatch ) ;
kernelInfo . storeKernelArgument ( pGlobalMemObjectKernelArg ) ;
DBG_LOG ( LogPatchTokens ,
" \n .GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ArgumentNumber " , pGlobalMemObjectKernelArg - > ArgumentNumber ,
" \n .Offset " , pGlobalMemObjectKernelArg - > Offset ,
" \n .LocationIndex " , pGlobalMemObjectKernelArg - > LocationIndex ,
" \n .LocationIndex2 " , pGlobalMemObjectKernelArg - > LocationIndex2 ) ;
} ;
break ;
case PATCH_TOKEN_STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT : {
const SPatchStatelessGlobalMemoryObjectKernelArgument * pStatelessGlobalMemObjKernelArg = nullptr ;
pStatelessGlobalMemObjKernelArg =
reinterpret_cast < const SPatchStatelessGlobalMemoryObjectKernelArgument * > ( pPatch ) ;
kernelInfo . storeKernelArgument ( pStatelessGlobalMemObjKernelArg ) ;
DBG_LOG ( LogPatchTokens ,
" \n .STATELESS_GLOBAL_MEMORY_OBJECT_KERNEL_ARGUMENT " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ArgumentNumber " , pStatelessGlobalMemObjKernelArg - > ArgumentNumber ,
" \n .SurfaceStateHeapOffset " , pStatelessGlobalMemObjKernelArg - > SurfaceStateHeapOffset ,
" \n .DataParamOffset " , pStatelessGlobalMemObjKernelArg - > DataParamOffset ,
" \n .DataParamSize " , pStatelessGlobalMemObjKernelArg - > DataParamSize ,
" \n .LocationIndex " , pStatelessGlobalMemObjKernelArg - > LocationIndex ,
" \n .LocationIndex2 " , pStatelessGlobalMemObjKernelArg - > LocationIndex2 ) ;
} ;
break ;
case PATCH_TOKEN_STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT : {
const SPatchStatelessConstantMemoryObjectKernelArgument * pPatchToken = reinterpret_cast < const SPatchStatelessConstantMemoryObjectKernelArgument * > ( pPatch ) ;
kernelInfo . storeKernelArgument ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .STATELESS_CONSTANT_MEMORY_OBJECT_KERNEL_ARGUMENT " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ArgumentNumber " , pPatchToken - > ArgumentNumber ,
" \n .SurfaceStateHeapOffset " , pPatchToken - > SurfaceStateHeapOffset ,
" \n .DataParamOffset " , pPatchToken - > DataParamOffset ,
" \n .DataParamSize " , pPatchToken - > DataParamSize ) ;
} break ;
case PATCH_TOKEN_STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT : {
const SPatchStatelessDeviceQueueKernelArgument * pPatchToken = reinterpret_cast < const SPatchStatelessDeviceQueueKernelArgument * > ( pPatch ) ;
kernelInfo . storeKernelArgument ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .STATELESS_DEVICE_QUEUE_KERNEL_ARGUMENT " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ArgumentNumber " , pPatchToken - > ArgumentNumber ,
" \n .SurfaceStateHeapOffset " , pPatchToken - > SurfaceStateHeapOffset ,
" \n .DataParamOffset " , pPatchToken - > DataParamOffset ,
" \n .DataParamSize " , pPatchToken - > DataParamSize ) ;
} break ;
case PATCH_TOKEN_ALLOCATE_STATELESS_PRIVATE_MEMORY : {
const SPatchAllocateStatelessPrivateSurface * pPatchToken = reinterpret_cast < const SPatchAllocateStatelessPrivateSurface * > ( pPatch ) ;
kernelInfo . storePatchToken ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .ALLOCATE_STATELESS_PRIVATE_MEMORY " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .SurfaceStateHeapOffset " , pPatchToken - > SurfaceStateHeapOffset ,
" \n .DataParamOffset " , pPatchToken - > DataParamOffset ,
" \n .DataParamSize " , pPatchToken - > DataParamSize ,
" \n .PerThreadPrivateMemorySize " , pPatchToken - > PerThreadPrivateMemorySize ) ;
} break ;
case PATCH_TOKEN_ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION : {
const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization * pPatchToken = reinterpret_cast < const SPatchAllocateStatelessConstantMemorySurfaceWithInitialization * > ( pPatch ) ;
kernelInfo . storePatchToken ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .ALLOCATE_STATELESS_CONSTANT_MEMORY_SURFACE_WITH_INITIALIZATION " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ConstantBufferIndex " , pPatchToken - > ConstantBufferIndex ,
" \n .SurfaceStateHeapOffset " , pPatchToken - > SurfaceStateHeapOffset ,
" \n .DataParamOffset " , pPatchToken - > DataParamOffset ,
" \n .DataParamSize " , pPatchToken - > DataParamSize ) ;
} break ;
case PATCH_TOKEN_ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION : {
const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization * pPatchToken = reinterpret_cast < const SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization * > ( pPatch ) ;
kernelInfo . storePatchToken ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .ALLOCATE_STATELESS_GLOBAL_MEMORY_SURFACE_WITH_INITIALIZATION " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .GlobalBufferIndex " , pPatchToken - > GlobalBufferIndex ,
" \n .SurfaceStateHeapOffset " , pPatchToken - > SurfaceStateHeapOffset ,
" \n .DataParamOffset " , pPatchToken - > DataParamOffset ,
" \n .DataParamSize " , pPatchToken - > DataParamSize ) ;
} break ;
case PATCH_TOKEN_ALLOCATE_STATELESS_PRINTF_SURFACE : {
const SPatchAllocateStatelessPrintfSurface * pPatchToken = reinterpret_cast < const SPatchAllocateStatelessPrintfSurface * > ( pPatch ) ;
kernelInfo . storePatchToken ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .ALLOCATE_STATELESS_PRINTF_SURFACE " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .PrintfSurfaceIndex " , pPatchToken - > PrintfSurfaceIndex ,
" \n .SurfaceStateHeapOffset " , pPatchToken - > SurfaceStateHeapOffset ,
" \n .DataParamOffset " , pPatchToken - > DataParamOffset ,
" \n .DataParamSize " , pPatchToken - > DataParamSize ) ;
} break ;
case PATCH_TOKEN_ALLOCATE_STATELESS_EVENT_POOL_SURFACE : {
const SPatchAllocateStatelessEventPoolSurface * pPatchToken = reinterpret_cast < const SPatchAllocateStatelessEventPoolSurface * > ( pPatch ) ;
kernelInfo . storePatchToken ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .ALLOCATE_STATELESS_EVENT_POOL_SURFACE " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .EventPoolSurfaceIndex " , pPatchToken - > EventPoolSurfaceIndex ,
" \n .SurfaceStateHeapOffset " , pPatchToken - > SurfaceStateHeapOffset ,
" \n .DataParamOffset " , pPatchToken - > DataParamOffset ,
" \n .DataParamSize " , pPatchToken - > DataParamSize ) ;
} break ;
case PATCH_TOKEN_ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE : {
const SPatchAllocateStatelessDefaultDeviceQueueSurface * pPatchToken = reinterpret_cast < const SPatchAllocateStatelessDefaultDeviceQueueSurface * > ( pPatch ) ;
kernelInfo . storePatchToken ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .ALLOCATE_STATELESS_DEFAULT_DEVICE_QUEUE_SURFACE " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .SurfaceStateHeapOffset " , pPatchToken - > SurfaceStateHeapOffset ,
" \n .DataParamOffset " , pPatchToken - > DataParamOffset ,
" \n .DataParamSize " , pPatchToken - > DataParamSize ) ;
} break ;
case PATCH_TOKEN_STRING : {
const SPatchString * pPatchToken = reinterpret_cast < const SPatchString * > ( pPatch ) ;
kernelInfo . storePatchToken ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .STRING " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .Index " , pPatchToken - > Index ,
" \n .StringSize " , pPatchToken - > StringSize ) ;
} break ;
case PATCH_TOKEN_INLINE_VME_SAMPLER_INFO :
kernelInfo . isVmeWorkload = true ;
DBG_LOG ( LogPatchTokens ,
" \n .INLINE_VME_SAMPLER_INFO " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ) ;
break ;
case PATCH_TOKEN_GTPIN_FREE_GRF_INFO : {
const SPatchGtpinFreeGRFInfo * pPatchToken = reinterpret_cast < const SPatchGtpinFreeGRFInfo * > ( pPatch ) ;
DBG_LOG ( LogPatchTokens ,
" \n .PATCH_TOKEN_GTPIN_FREE_GRF_INFO " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .BufferSize " , pPatchToken - > BufferSize ) ;
} break ;
case PATCH_TOKEN_STATE_SIP : {
const SPatchStateSIP * pPatchToken = reinterpret_cast < const SPatchStateSIP * > ( pPatch ) ;
kernelInfo . systemKernelOffset = pPatchToken - > SystemKernelOffset ;
DBG_LOG ( LogPatchTokens ,
" \n .PATCH_TOKEN_STATE_SIP " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .SystemKernelOffset " , pPatchToken - > SystemKernelOffset ) ;
} break ;
2018-03-09 21:40:31 +08:00
case PATCH_TOKEN_ALLOCATE_SIP_SURFACE : {
auto * pPatchToken = reinterpret_cast < const SPatchAllocateSystemThreadSurface * > ( pPatch ) ;
kernelInfo . storePatchToken ( pPatchToken ) ;
DBG_LOG ( LogPatchTokens ,
" \n .PATCH_TOKEN_ALLOCATE_SIP_SURFACE " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .BTI " , pPatchToken - > BTI ,
" \n .Offset " , pPatchToken - > Offset ,
" \n .PerThreadSystemThreadSurfaceSize " , pPatchToken - > PerThreadSystemThreadSurfaceSize ) ;
} break ;
2017-12-21 07:45:38 +08:00
default :
printDebugString ( DebugManager . flags . PrintDebugMessages . get ( ) , stderr , " Program::parsePatchList. Unknown Patch Token: %d \n " , pPatch - > Token ) ;
if ( false = = isSafeToSkipUnhandledToken ( pPatch - > Token ) ) {
retVal = CL_INVALID_KERNEL ;
}
break ;
}
if ( retVal ! = CL_SUCCESS ) {
break ;
}
pCurPatchListPtr = ptrOffset ( pCurPatchListPtr , pPatch - > Size ) ;
}
if ( retVal = = CL_SUCCESS ) {
retVal = kernelInfo . resolveKernelInfo ( ) ;
}
if ( kernelInfo . patchInfo . dataParameterStream & & kernelInfo . patchInfo . dataParameterStream - > DataParameterStreamSize ) {
uint32_t crossThreadDataSize = kernelInfo . patchInfo . dataParameterStream - > DataParameterStreamSize ;
kernelInfo . crossThreadData = new char [ crossThreadDataSize ] ;
memset ( kernelInfo . crossThreadData , 0x00 , crossThreadDataSize ) ;
if ( LocalMemoryStatelessWindowStartAddressOffset ! = 0xFFffFFff ) {
* ( uintptr_t * ) & ( kernelInfo . crossThreadData [ LocalMemoryStatelessWindowStartAddressOffset ] ) = reinterpret_cast < uintptr_t > ( this - > pDevice - > getSLMWindowStartAddress ( ) ) ;
}
if ( LocalMemoryStatelessWindowSizeOffset ! = 0xFFffFFff ) {
* ( uint32_t * ) & ( kernelInfo . crossThreadData [ LocalMemoryStatelessWindowSizeOffset ] ) = ( uint32_t ) this - > pDevice - > getDeviceInfo ( ) . localMemSize ;
}
if ( kernelInfo . patchInfo . pAllocateStatelessPrivateSurface & & ( PrivateMemoryStatelessSizeOffset ! = 0xFFffFFff ) ) {
* ( uint32_t * ) & ( kernelInfo . crossThreadData [ PrivateMemoryStatelessSizeOffset ] ) = kernelInfo . patchInfo . pAllocateStatelessPrivateSurface - > PerThreadPrivateMemorySize * this - > getDevice ( 0 ) . getDeviceInfo ( ) . computeUnitsUsedForScratch * kernelInfo . getMaxSimdSize ( ) ;
}
if ( kernelInfo . workloadInfo . maxWorkGroupSizeOffset ! = WorkloadInfo : : undefinedOffset ) {
* ( uint32_t * ) & ( kernelInfo . crossThreadData [ kernelInfo . workloadInfo . maxWorkGroupSizeOffset ] ) = ( uint32_t ) this - > getDevice ( 0 ) . getDeviceInfo ( ) . maxWorkGroupSize ;
}
}
2018-03-08 18:56:44 +08:00
if ( kernelInfo . heapInfo . pKernelHeader - > KernelHeapSize & & this - > pDevice ) {
2018-04-05 01:02:07 +08:00
retVal = kernelInfo . createKernelAllocation ( this - > pDevice - > getMemoryManager ( ) ) ? CL_SUCCESS : CL_OUT_OF_HOST_MEMORY ;
2018-03-08 18:56:44 +08:00
}
2018-03-09 18:52:14 +08:00
DEBUG_BREAK_IF ( kernelInfo . heapInfo . pKernelHeader - > KernelHeapSize & & ! this - > pDevice ) ;
2017-12-21 07:45:38 +08:00
return retVal ;
}
cl_int Program : : parseProgramScopePatchList ( ) {
cl_int retVal = CL_SUCCESS ;
cl_uint surfaceSize = 0 ;
auto pPatchList = programScopePatchList ;
auto patchListSize = programScopePatchListSize ;
auto pCurPatchListPtr = pPatchList ;
cl_uint headerSize = 0 ;
while ( ptrDiff ( pCurPatchListPtr , pPatchList ) < patchListSize ) {
auto pPatch = reinterpret_cast < const SPatchItemHeader * > ( pCurPatchListPtr ) ;
switch ( pPatch - > Token ) {
case PATCH_TOKEN_ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO : {
auto patch = * ( SPatchAllocateConstantMemorySurfaceProgramBinaryInfo * ) pPatch ;
if ( constantSurface ) {
pDevice - > getMemoryManager ( ) - > freeGraphicsMemory ( constantSurface ) ;
}
surfaceSize = patch . InlineDataSize ;
headerSize = sizeof ( SPatchAllocateConstantMemorySurfaceProgramBinaryInfo ) ;
constantSurface = pDevice - > getMemoryManager ( ) - > createGraphicsAllocationWithRequiredBitness ( surfaceSize , nullptr ) ;
memcpy_s ( constantSurface - > getUnderlyingBuffer ( ) , surfaceSize , ( cl_char * ) pPatch + headerSize , surfaceSize ) ;
pCurPatchListPtr = ptrOffset ( pCurPatchListPtr , surfaceSize ) ;
DBG_LOG ( LogPatchTokens ,
" \n .ALLOCATE_CONSTANT_MEMORY_SURFACE_PROGRAM_BINARY_INFO " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ConstantBufferIndex " , patch . ConstantBufferIndex ,
" \n .InitializationDataSize " , patch . InlineDataSize ) ;
} ;
break ;
case PATCH_TOKEN_ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO : {
auto patch = * ( SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo * ) pPatch ;
if ( globalSurface ) {
pDevice - > getMemoryManager ( ) - > freeGraphicsMemory ( globalSurface ) ;
}
surfaceSize = patch . InlineDataSize ;
globalVarTotalSize + = ( size_t ) surfaceSize ;
headerSize = sizeof ( SPatchAllocateGlobalMemorySurfaceProgramBinaryInfo ) ;
globalSurface = pDevice - > getMemoryManager ( ) - > createGraphicsAllocationWithRequiredBitness ( surfaceSize , nullptr ) ;
memcpy_s ( globalSurface - > getUnderlyingBuffer ( ) , surfaceSize , ( cl_char * ) pPatch + headerSize , surfaceSize ) ;
pCurPatchListPtr = ptrOffset ( pCurPatchListPtr , surfaceSize ) ;
DBG_LOG ( LogPatchTokens ,
" \n .ALLOCATE_GLOBAL_MEMORY_SURFACE_PROGRAM_BINARY_INFO " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .BufferType " , patch . Type ,
" \n .GlobalBufferIndex " , patch . GlobalBufferIndex ,
" \n .InitializationDataSize " , patch . InlineDataSize ) ;
} ;
break ;
case PATCH_TOKEN_GLOBAL_POINTER_PROGRAM_BINARY_INFO :
if ( globalSurface ! = nullptr ) {
auto patch = * ( SPatchGlobalPointerProgramBinaryInfo * ) pPatch ;
if ( ( patch . GlobalBufferIndex = = 0 ) & & ( patch . BufferIndex = = 0 ) & & ( patch . BufferType = = PROGRAM_SCOPE_GLOBAL_BUFFER ) ) {
void * pPtr = ( void * ) ( ( uintptr_t ) globalSurface - > getUnderlyingBuffer ( ) + ( uintptr_t ) patch . GlobalPointerOffset ) ;
if ( globalSurface - > is32BitAllocation ) {
* reinterpret_cast < uint32_t * > ( pPtr ) + = static_cast < uint32_t > ( globalSurface - > getGpuAddressToPatch ( ) ) ;
} else {
* reinterpret_cast < uintptr_t * > ( pPtr ) + = reinterpret_cast < uintptr_t > ( globalSurface - > getUnderlyingBuffer ( ) ) ;
}
} else {
printDebugString ( DebugManager . flags . PrintDebugMessages . get ( ) , stderr , " Program::parseProgramScopePatchList. Unhandled Data parameter: %d \n " , pPatch - > Token ) ;
}
DBG_LOG ( LogPatchTokens ,
" \n .GLOBAL_POINTER_PROGRAM_BINARY_INFO " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .GlobalBufferIndex " , patch . GlobalBufferIndex ,
" \n .GlobalPointerOffset " , patch . GlobalPointerOffset ,
" \n .BufferType " , patch . BufferType ,
" \n .BufferIndex " , patch . BufferIndex ) ;
}
break ;
case PATCH_TOKEN_CONSTANT_POINTER_PROGRAM_BINARY_INFO :
if ( constantSurface ! = nullptr ) {
auto patch = * ( SPatchConstantPointerProgramBinaryInfo * ) pPatch ;
if ( ( patch . ConstantBufferIndex = = 0 ) & & ( patch . BufferIndex = = 0 ) & & ( patch . BufferType = = PROGRAM_SCOPE_CONSTANT_BUFFER ) ) {
void * pPtr = ( uintptr_t * ) ( ( uintptr_t ) constantSurface - > getUnderlyingBuffer ( ) + ( uintptr_t ) patch . ConstantPointerOffset ) ;
if ( constantSurface - > is32BitAllocation ) {
* reinterpret_cast < uint32_t * > ( pPtr ) + = static_cast < uint32_t > ( constantSurface - > getGpuAddressToPatch ( ) ) ;
} else {
* reinterpret_cast < uintptr_t * > ( pPtr ) + = reinterpret_cast < uintptr_t > ( constantSurface - > getUnderlyingBuffer ( ) ) ;
}
} else {
printDebugString ( DebugManager . flags . PrintDebugMessages . get ( ) , stderr , " Program::parseProgramScopePatchList. Unhandled Data parameter: %d \n " , pPatch - > Token ) ;
}
DBG_LOG ( LogPatchTokens ,
" \n .CONSTANT_POINTER_PROGRAM_BINARY_INFO " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ,
" \n .ConstantBufferIndex " , patch . ConstantBufferIndex ,
" \n .ConstantPointerOffset " , patch . ConstantPointerOffset ,
" \n .BufferType " , patch . BufferType ,
" \n .BufferIndex " , patch . BufferIndex ) ;
}
break ;
default :
if ( false = = isSafeToSkipUnhandledToken ( pPatch - > Token ) ) {
retVal = CL_INVALID_BINARY ;
}
printDebugString ( DebugManager . flags . PrintDebugMessages . get ( ) , stderr , " Program::parseProgramScopePatchList. Unknown Patch Token: %d \n " , pPatch - > Token ) ;
DBG_LOG ( LogPatchTokens ,
" \n .Program Unknown Patch Token " , pPatch - > Token ,
" \n .Size " , pPatch - > Size ) ;
}
if ( retVal ! = CL_SUCCESS ) {
break ;
}
pCurPatchListPtr = ptrOffset ( pCurPatchListPtr , pPatch - > Size ) ;
}
return retVal ;
}
cl_int Program : : processGenBinary ( ) {
cl_int retVal = CL_SUCCESS ;
2018-03-08 18:56:44 +08:00
cleanCurrentKernelInfo ( ) ;
2017-12-21 07:45:38 +08:00
do {
if ( ! genBinary | | genBinarySize = = 0 ) {
retVal = CL_INVALID_BINARY ;
break ;
}
auto pCurBinaryPtr = genBinary ;
auto pGenBinaryHeader = reinterpret_cast < const SProgramBinaryHeader * > ( pCurBinaryPtr ) ;
if ( ! validateGenBinaryHeader ( pGenBinaryHeader ) ) {
retVal = CL_INVALID_BINARY ;
break ;
}
pCurBinaryPtr = ptrOffset ( pCurBinaryPtr , sizeof ( SProgramBinaryHeader ) ) ;
programScopePatchList = pCurBinaryPtr ;
programScopePatchListSize = pGenBinaryHeader - > PatchListSize ;
if ( programScopePatchListSize ! = 0u ) {
retVal = parseProgramScopePatchList ( ) ;
}
pCurBinaryPtr = ptrOffset ( pCurBinaryPtr , pGenBinaryHeader - > PatchListSize ) ;
auto numKernels = pGenBinaryHeader - > NumberOfKernels ;
for ( uint32_t i = 0 ; i < numKernels & & retVal = = CL_SUCCESS ; i + + ) {
size_t bytesProcessed = processKernel ( pCurBinaryPtr , retVal ) ;
pCurBinaryPtr = ptrOffset ( pCurBinaryPtr , bytesProcessed ) ;
}
} while ( false ) ;
return retVal ;
}
bool Program : : validateGenBinaryDevice ( GFXCORE_FAMILY device ) const {
bool isValid = familyEnabled [ device ] ;
return isValid ;
}
bool Program : : validateGenBinaryHeader ( const iOpenCL : : SProgramBinaryHeader * pGenBinaryHeader ) const {
return pGenBinaryHeader - > Magic = = MAGIC_CL & &
pGenBinaryHeader - > Version = = CURRENT_ICBE_VERSION & &
validateGenBinaryDevice ( static_cast < GFXCORE_FAMILY > ( pGenBinaryHeader - > Device ) ) ;
}
2018-05-02 20:27:55 +08:00
void Program : : processDebugData ( ) {
if ( debugData ! = nullptr ) {
SProgramDebugDataHeaderIGC * programDebugHeader = reinterpret_cast < SProgramDebugDataHeaderIGC * > ( debugData ) ;
DEBUG_BREAK_IF ( programDebugHeader - > NumberOfKernels ! = kernelInfoArray . size ( ) ) ;
const SKernelDebugDataHeaderIGC * kernelDebugHeader = reinterpret_cast < SKernelDebugDataHeaderIGC * > ( ptrOffset ( programDebugHeader , sizeof ( SProgramDebugDataHeaderIGC ) ) ) ;
const char * kernelName = nullptr ;
const char * kernelDebugData = nullptr ;
for ( uint32_t i = 0 ; i < programDebugHeader - > NumberOfKernels ; i + + ) {
kernelName = reinterpret_cast < const char * > ( ptrOffset ( kernelDebugHeader , sizeof ( SKernelDebugDataHeaderIGC ) ) ) ;
auto kernelInfo = kernelInfoArray [ i ] ;
UNRECOVERABLE_IF ( kernelInfo - > name . compare ( 0 , kernelInfo - > name . size ( ) , kernelName ) ! = 0 ) ;
kernelDebugData = ptrOffset ( kernelName , kernelDebugHeader - > KernelNameSize ) ;
kernelInfo - > debugData . vIsa = kernelDebugData ;
kernelInfo - > debugData . genIsa = ptrOffset ( kernelDebugData , kernelDebugHeader - > SizeVisaDbgInBytes ) ;
kernelInfo - > debugData . vIsaSize = kernelDebugHeader - > SizeVisaDbgInBytes ;
kernelInfo - > debugData . genIsaSize = kernelDebugHeader - > SizeGenIsaDbgInBytes ;
kernelDebugData = ptrOffset ( kernelDebugData , kernelDebugHeader - > SizeVisaDbgInBytes + kernelDebugHeader - > SizeGenIsaDbgInBytes ) ;
kernelDebugHeader = reinterpret_cast < const SKernelDebugDataHeaderIGC * > ( kernelDebugData ) ;
}
}
}
2017-12-21 07:45:38 +08:00
} // namespace OCLRT