From fdcc07a121f3d0b13bddffdfd9038a071f264783 Mon Sep 17 00:00:00 2001 From: Jaime Arteaga Date: Thu, 5 Mar 2020 00:25:57 -0800 Subject: [PATCH] More cleanup of Level Zero core API Change-Id: Iad2118683efb4f5029503a8fec20d88b37d22e07 Signed-off: Jaime Arteaga --- level_zero/api/core/ze_core_loader.cpp | 306 ++++++++++++------ .../core/source/builtin_functions_lib.h | 18 +- .../source/builtin_functions_lib_impl.cpp | 56 ++-- level_zero/core/source/cmdlist_hw.inl | 175 +++++++--- level_zero/core/source/cmdlist_hw_immediate.h | 17 +- .../core/source/cmdlist_hw_immediate.inl | 86 ++--- level_zero/core/source/cmdqueue.cpp | 11 +- level_zero/core/source/cmdqueue.h | 10 +- level_zero/core/source/cmdqueue_hw.inl | 6 +- level_zero/core/source/cmdqueue_imp.h | 8 +- level_zero/core/source/device_imp.cpp | 1 + level_zero/core/source/driver_handle_imp.cpp | 1 + level_zero/core/source/image_hw.h | 85 ++++- level_zero/core/source/image_hw.inl | 12 +- level_zero/core/source/kernel_hw.h | 3 +- level_zero/core/source/kernel_imp.cpp | 1 + level_zero/core/source/memory.cpp | 7 +- .../core/source/memory_operations_helper.h | 1 + level_zero/core/source/module_imp.cpp | 8 +- level_zero/core/source/sampler_imp.cpp | 6 +- 20 files changed, 551 insertions(+), 267 deletions(-) diff --git a/level_zero/api/core/ze_core_loader.cpp b/level_zero/api/core/ze_core_loader.cpp index 6260cad9cd..875b2cf545 100644 --- a/level_zero/api/core/ze_core_loader.cpp +++ b/level_zero/api/core/ze_core_loader.cpp @@ -50,47 +50,61 @@ zeGetDriverProcAddrTable( driver_ddiTable.core_ddiTable.Driver = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnGet = (ze_pfnDriverGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGet_Tracing"); - if (nullptr == pDdiTable->pfnGet) + if (nullptr == pDdiTable->pfnGet) { pDdiTable->pfnGet = driver_ddiTable.core_ddiTable.Driver.pfnGet; + } pDdiTable->pfnGetApiVersion = (ze_pfnDriverGetApiVersion_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetApiVersion_Tracing"); - if (nullptr == pDdiTable->pfnGetApiVersion) + if (nullptr == pDdiTable->pfnGetApiVersion) { pDdiTable->pfnGetApiVersion = driver_ddiTable.core_ddiTable.Driver.pfnGetApiVersion; + } pDdiTable->pfnGetProperties = (ze_pfnDriverGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetProperties) + if (nullptr == pDdiTable->pfnGetProperties) { pDdiTable->pfnGetProperties = driver_ddiTable.core_ddiTable.Driver.pfnGetProperties; + } pDdiTable->pfnGetIPCProperties = (ze_pfnDriverGetIPCProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetIPCProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetIPCProperties) + if (nullptr == pDdiTable->pfnGetIPCProperties) { pDdiTable->pfnGetIPCProperties = driver_ddiTable.core_ddiTable.Driver.pfnGetIPCProperties; + } pDdiTable->pfnGetExtensionFunctionAddress = (ze_pfnDriverGetExtensionFunctionAddress_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetExtensionFunctionAddress_Tracing"); - if (nullptr == pDdiTable->pfnGetExtensionFunctionAddress) + if (nullptr == pDdiTable->pfnGetExtensionFunctionAddress) { pDdiTable->pfnGetExtensionFunctionAddress = driver_ddiTable.core_ddiTable.Driver.pfnGetExtensionFunctionAddress; + } pDdiTable->pfnAllocSharedMem = (ze_pfnDriverAllocSharedMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverAllocSharedMem_Tracing"); - if (nullptr == pDdiTable->pfnAllocSharedMem) + if (nullptr == pDdiTable->pfnAllocSharedMem) { pDdiTable->pfnAllocSharedMem = driver_ddiTable.core_ddiTable.Driver.pfnAllocSharedMem; + } pDdiTable->pfnAllocDeviceMem = (ze_pfnDriverAllocDeviceMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverAllocDeviceMem_Tracing"); - if (nullptr == pDdiTable->pfnAllocDeviceMem) + if (nullptr == pDdiTable->pfnAllocDeviceMem) { pDdiTable->pfnAllocDeviceMem = driver_ddiTable.core_ddiTable.Driver.pfnAllocDeviceMem; + } pDdiTable->pfnAllocHostMem = (ze_pfnDriverAllocHostMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverAllocHostMem_Tracing"); - if (nullptr == pDdiTable->pfnAllocHostMem) + if (nullptr == pDdiTable->pfnAllocHostMem) { pDdiTable->pfnAllocHostMem = driver_ddiTable.core_ddiTable.Driver.pfnAllocHostMem; + } pDdiTable->pfnFreeMem = (ze_pfnDriverFreeMem_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverFreeMem_Tracing"); - if (nullptr == pDdiTable->pfnFreeMem) + if (nullptr == pDdiTable->pfnFreeMem) { pDdiTable->pfnFreeMem = driver_ddiTable.core_ddiTable.Driver.pfnFreeMem; + } pDdiTable->pfnGetMemAllocProperties = (ze_pfnDriverGetMemAllocProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetMemAllocProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetMemAllocProperties) + if (nullptr == pDdiTable->pfnGetMemAllocProperties) { pDdiTable->pfnGetMemAllocProperties = driver_ddiTable.core_ddiTable.Driver.pfnGetMemAllocProperties; + } pDdiTable->pfnGetMemAddressRange = (ze_pfnDriverGetMemAddressRange_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetMemAddressRange_Tracing"); - if (nullptr == pDdiTable->pfnGetMemAddressRange) + if (nullptr == pDdiTable->pfnGetMemAddressRange) { pDdiTable->pfnGetMemAddressRange = driver_ddiTable.core_ddiTable.Driver.pfnGetMemAddressRange; + } pDdiTable->pfnGetMemIpcHandle = (ze_pfnDriverGetMemIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverGetMemIpcHandle_Tracing"); - if (nullptr == pDdiTable->pfnGetMemIpcHandle) + if (nullptr == pDdiTable->pfnGetMemIpcHandle) { pDdiTable->pfnGetMemIpcHandle = driver_ddiTable.core_ddiTable.Driver.pfnGetMemIpcHandle; + } pDdiTable->pfnOpenMemIpcHandle = (ze_pfnDriverOpenMemIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverOpenMemIpcHandle_Tracing"); - if (nullptr == pDdiTable->pfnOpenMemIpcHandle) + if (nullptr == pDdiTable->pfnOpenMemIpcHandle) { pDdiTable->pfnOpenMemIpcHandle = driver_ddiTable.core_ddiTable.Driver.pfnOpenMemIpcHandle; + } pDdiTable->pfnCloseMemIpcHandle = (ze_pfnDriverCloseMemIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDriverCloseMemIpcHandle_Tracing"); - if (nullptr == pDdiTable->pfnCloseMemIpcHandle) + if (nullptr == pDdiTable->pfnCloseMemIpcHandle) { pDdiTable->pfnCloseMemIpcHandle = driver_ddiTable.core_ddiTable.Driver.pfnCloseMemIpcHandle; + } } return result; } @@ -115,8 +129,9 @@ zeGetGlobalProcAddrTable( driver_ddiTable.core_ddiTable.Global = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnInit = (ze_pfnInit_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeInit_Tracing"); - if (nullptr == pDdiTable->pfnInit) + if (nullptr == pDdiTable->pfnInit) { pDdiTable->pfnInit = driver_ddiTable.core_ddiTable.Global.pfnInit; + } } return result; } @@ -160,65 +175,85 @@ zeGetDeviceProcAddrTable( driver_ddiTable.core_ddiTable.Device = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnGet = (ze_pfnDeviceGet_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGet_Tracing"); - if (nullptr == pDdiTable->pfnGet) + if (nullptr == pDdiTable->pfnGet) { pDdiTable->pfnGet = driver_ddiTable.core_ddiTable.Device.pfnGet; + } pDdiTable->pfnGetSubDevices = (ze_pfnDeviceGetSubDevices_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetSubDevices_Tracing"); - if (nullptr == pDdiTable->pfnGetSubDevices) + if (nullptr == pDdiTable->pfnGetSubDevices) { pDdiTable->pfnGetSubDevices = driver_ddiTable.core_ddiTable.Device.pfnGetSubDevices; + } pDdiTable->pfnGetProperties = (ze_pfnDeviceGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetProperties) + if (nullptr == pDdiTable->pfnGetProperties) { pDdiTable->pfnGetProperties = driver_ddiTable.core_ddiTable.Device.pfnGetProperties; + } pDdiTable->pfnSystemBarrier = (ze_pfnDeviceSystemBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceSystemBarrier_Tracing"); - if (nullptr == pDdiTable->pfnSystemBarrier) + if (nullptr == pDdiTable->pfnSystemBarrier) { pDdiTable->pfnSystemBarrier = driver_ddiTable.core_ddiTable.Device.pfnSystemBarrier; + } pDdiTable->pfnRegisterCLMemory = (ze_pfnDeviceRegisterCLMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceRegisterCLMemory_Tracing"); - if (nullptr == pDdiTable->pfnRegisterCLMemory) + if (nullptr == pDdiTable->pfnRegisterCLMemory) { pDdiTable->pfnRegisterCLMemory = driver_ddiTable.core_ddiTable.Device.pfnRegisterCLMemory; + } pDdiTable->pfnRegisterCLProgram = (ze_pfnDeviceRegisterCLProgram_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceRegisterCLProgram_Tracing"); - if (nullptr == pDdiTable->pfnRegisterCLProgram) + if (nullptr == pDdiTable->pfnRegisterCLProgram) { pDdiTable->pfnRegisterCLProgram = driver_ddiTable.core_ddiTable.Device.pfnRegisterCLProgram; + } pDdiTable->pfnRegisterCLCommandQueue = (ze_pfnDeviceRegisterCLCommandQueue_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceRegisterCLCommandQueue_Tracing"); - if (nullptr == pDdiTable->pfnRegisterCLCommandQueue) + if (nullptr == pDdiTable->pfnRegisterCLCommandQueue) { pDdiTable->pfnRegisterCLCommandQueue = driver_ddiTable.core_ddiTable.Device.pfnRegisterCLCommandQueue; + } pDdiTable->pfnGetComputeProperties = (ze_pfnDeviceGetComputeProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetComputeProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetComputeProperties) + if (nullptr == pDdiTable->pfnGetComputeProperties) { pDdiTable->pfnGetComputeProperties = driver_ddiTable.core_ddiTable.Device.pfnGetComputeProperties; + } pDdiTable->pfnGetKernelProperties = (ze_pfnDeviceGetKernelProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetKernelProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetKernelProperties) + if (nullptr == pDdiTable->pfnGetKernelProperties) { pDdiTable->pfnGetKernelProperties = driver_ddiTable.core_ddiTable.Device.pfnGetKernelProperties; + } pDdiTable->pfnGetMemoryProperties = (ze_pfnDeviceGetMemoryProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetMemoryProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetMemoryProperties) + if (nullptr == pDdiTable->pfnGetMemoryProperties) { pDdiTable->pfnGetMemoryProperties = driver_ddiTable.core_ddiTable.Device.pfnGetMemoryProperties; + } pDdiTable->pfnGetMemoryAccessProperties = (ze_pfnDeviceGetMemoryAccessProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetMemoryAccessProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetMemoryAccessProperties) + if (nullptr == pDdiTable->pfnGetMemoryAccessProperties) { pDdiTable->pfnGetMemoryAccessProperties = driver_ddiTable.core_ddiTable.Device.pfnGetMemoryAccessProperties; + } pDdiTable->pfnGetCacheProperties = (ze_pfnDeviceGetCacheProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetCacheProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetCacheProperties) + if (nullptr == pDdiTable->pfnGetCacheProperties) { pDdiTable->pfnGetCacheProperties = driver_ddiTable.core_ddiTable.Device.pfnGetCacheProperties; + } pDdiTable->pfnGetImageProperties = (ze_pfnDeviceGetImageProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetImageProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetImageProperties) + if (nullptr == pDdiTable->pfnGetImageProperties) { pDdiTable->pfnGetImageProperties = driver_ddiTable.core_ddiTable.Device.pfnGetImageProperties; + } pDdiTable->pfnGetP2PProperties = (ze_pfnDeviceGetP2PProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceGetP2PProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetP2PProperties) + if (nullptr == pDdiTable->pfnGetP2PProperties) { pDdiTable->pfnGetP2PProperties = driver_ddiTable.core_ddiTable.Device.pfnGetP2PProperties; + } pDdiTable->pfnCanAccessPeer = (ze_pfnDeviceCanAccessPeer_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceCanAccessPeer_Tracing"); - if (nullptr == pDdiTable->pfnCanAccessPeer) + if (nullptr == pDdiTable->pfnCanAccessPeer) { pDdiTable->pfnCanAccessPeer = driver_ddiTable.core_ddiTable.Device.pfnCanAccessPeer; + } pDdiTable->pfnSetLastLevelCacheConfig = (ze_pfnDeviceSetLastLevelCacheConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceSetLastLevelCacheConfig_Tracing"); - if (nullptr == pDdiTable->pfnSetLastLevelCacheConfig) + if (nullptr == pDdiTable->pfnSetLastLevelCacheConfig) { pDdiTable->pfnSetLastLevelCacheConfig = driver_ddiTable.core_ddiTable.Device.pfnSetLastLevelCacheConfig; + } pDdiTable->pfnMakeMemoryResident = (ze_pfnDeviceMakeMemoryResident_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceMakeMemoryResident_Tracing"); - if (nullptr == pDdiTable->pfnMakeMemoryResident) + if (nullptr == pDdiTable->pfnMakeMemoryResident) { pDdiTable->pfnMakeMemoryResident = driver_ddiTable.core_ddiTable.Device.pfnMakeMemoryResident; + } pDdiTable->pfnEvictMemory = (ze_pfnDeviceEvictMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceEvictMemory_Tracing"); - if (nullptr == pDdiTable->pfnEvictMemory) + if (nullptr == pDdiTable->pfnEvictMemory) { pDdiTable->pfnEvictMemory = driver_ddiTable.core_ddiTable.Device.pfnEvictMemory; + } pDdiTable->pfnMakeImageResident = (ze_pfnDeviceMakeImageResident_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceMakeImageResident_Tracing"); - if (nullptr == pDdiTable->pfnMakeImageResident) + if (nullptr == pDdiTable->pfnMakeImageResident) { pDdiTable->pfnMakeImageResident = driver_ddiTable.core_ddiTable.Device.pfnMakeImageResident; + } pDdiTable->pfnEvictImage = (ze_pfnDeviceEvictImage_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeDeviceEvictImage_Tracing"); - if (nullptr == pDdiTable->pfnEvictImage) + if (nullptr == pDdiTable->pfnEvictImage) { pDdiTable->pfnEvictImage = driver_ddiTable.core_ddiTable.Device.pfnEvictImage; + } } return result; } @@ -246,17 +281,21 @@ zeGetCommandQueueProcAddrTable( driver_ddiTable.core_ddiTable.CommandQueue = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnCommandQueueCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueCreate_Tracing"); - if (nullptr == pDdiTable->pfnCreate) + if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.CommandQueue.pfnCreate; + } pDdiTable->pfnDestroy = (ze_pfnCommandQueueDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.CommandQueue.pfnDestroy; + } pDdiTable->pfnExecuteCommandLists = (ze_pfnCommandQueueExecuteCommandLists_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueExecuteCommandLists_Tracing"); - if (nullptr == pDdiTable->pfnExecuteCommandLists) + if (nullptr == pDdiTable->pfnExecuteCommandLists) { pDdiTable->pfnExecuteCommandLists = driver_ddiTable.core_ddiTable.CommandQueue.pfnExecuteCommandLists; + } pDdiTable->pfnSynchronize = (ze_pfnCommandQueueSynchronize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandQueueSynchronize_Tracing"); - if (nullptr == pDdiTable->pfnSynchronize) + if (nullptr == pDdiTable->pfnSynchronize) { pDdiTable->pfnSynchronize = driver_ddiTable.core_ddiTable.CommandQueue.pfnSynchronize; + } } return result; } @@ -303,74 +342,97 @@ zeGetCommandListProcAddrTable( driver_ddiTable.core_ddiTable.CommandList = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnAppendBarrier = (ze_pfnCommandListAppendBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendBarrier_Tracing"); - if (nullptr == pDdiTable->pfnAppendBarrier) + if (nullptr == pDdiTable->pfnAppendBarrier) { pDdiTable->pfnAppendBarrier = driver_ddiTable.core_ddiTable.CommandList.pfnAppendBarrier; + } pDdiTable->pfnAppendMemoryRangesBarrier = (ze_pfnCommandListAppendMemoryRangesBarrier_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryRangesBarrier_Tracing"); - if (nullptr == pDdiTable->pfnAppendMemoryRangesBarrier) + if (nullptr == pDdiTable->pfnAppendMemoryRangesBarrier) { pDdiTable->pfnAppendMemoryRangesBarrier = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryRangesBarrier; + } pDdiTable->pfnCreate = (ze_pfnCommandListCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListCreate_Tracing"); - if (nullptr == pDdiTable->pfnCreate) + if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.CommandList.pfnCreate; + } pDdiTable->pfnCreateImmediate = (ze_pfnCommandListCreateImmediate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListCreateImmediate_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.CommandList.pfnDestroy; + } pDdiTable->pfnDestroy = (ze_pfnCommandListDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.CommandList.pfnDestroy; + } pDdiTable->pfnClose = (ze_pfnCommandListClose_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListClose_Tracing"); - if (nullptr == pDdiTable->pfnClose) + if (nullptr == pDdiTable->pfnClose) { pDdiTable->pfnClose = driver_ddiTable.core_ddiTable.CommandList.pfnClose; + } pDdiTable->pfnReset = (ze_pfnCommandListReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListReset_Tracing"); - if (nullptr == pDdiTable->pfnReset) + if (nullptr == pDdiTable->pfnReset) { pDdiTable->pfnReset = driver_ddiTable.core_ddiTable.CommandList.pfnReset; + } pDdiTable->pfnAppendMemoryCopy = (ze_pfnCommandListAppendMemoryCopy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryCopy_Tracing"); - if (nullptr == pDdiTable->pfnAppendMemoryCopy) + if (nullptr == pDdiTable->pfnAppendMemoryCopy) { pDdiTable->pfnAppendMemoryCopy = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopy; + } pDdiTable->pfnAppendMemoryCopyRegion = (ze_pfnCommandListAppendMemoryCopyRegion_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryCopyRegion_Tracing"); - if (nullptr == pDdiTable->pfnAppendMemoryCopyRegion) + if (nullptr == pDdiTable->pfnAppendMemoryCopyRegion) { pDdiTable->pfnAppendMemoryCopyRegion = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryCopyRegion; + } pDdiTable->pfnAppendMemoryFill = (ze_pfnCommandListAppendMemoryFill_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryFill_Tracing"); - if (nullptr == pDdiTable->pfnAppendMemoryFill) + if (nullptr == pDdiTable->pfnAppendMemoryFill) { pDdiTable->pfnAppendMemoryFill = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryFill; + } pDdiTable->pfnAppendImageCopy = (ze_pfnCommandListAppendImageCopy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopy_Tracing"); - if (nullptr == pDdiTable->pfnAppendImageCopy) + if (nullptr == pDdiTable->pfnAppendImageCopy) { pDdiTable->pfnAppendImageCopy = driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopy; + } pDdiTable->pfnAppendImageCopyRegion = (ze_pfnCommandListAppendImageCopyRegion_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopyRegion_Tracing"); - if (nullptr == pDdiTable->pfnAppendImageCopyRegion) + if (nullptr == pDdiTable->pfnAppendImageCopyRegion) { pDdiTable->pfnAppendImageCopyRegion = driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyRegion; + } pDdiTable->pfnAppendImageCopyToMemory = (ze_pfnCommandListAppendImageCopyToMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopyToMemory_Tracing"); - if (nullptr == pDdiTable->pfnAppendImageCopyToMemory) + if (nullptr == pDdiTable->pfnAppendImageCopyToMemory) { pDdiTable->pfnAppendImageCopyToMemory = driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyToMemory; + } pDdiTable->pfnAppendImageCopyFromMemory = (ze_pfnCommandListAppendImageCopyFromMemory_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendImageCopyFromMemory_Tracing"); - if (nullptr == pDdiTable->pfnAppendImageCopyFromMemory) + if (nullptr == pDdiTable->pfnAppendImageCopyFromMemory) { pDdiTable->pfnAppendImageCopyFromMemory = driver_ddiTable.core_ddiTable.CommandList.pfnAppendImageCopyFromMemory; + } pDdiTable->pfnAppendMemoryPrefetch = (ze_pfnCommandListAppendMemoryPrefetch_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemoryPrefetch_Tracing"); - if (nullptr == pDdiTable->pfnAppendMemoryPrefetch) + if (nullptr == pDdiTable->pfnAppendMemoryPrefetch) { pDdiTable->pfnAppendMemoryPrefetch = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemoryPrefetch; + } pDdiTable->pfnAppendMemAdvise = (ze_pfnCommandListAppendMemAdvise_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendMemAdvise_Tracing"); - if (nullptr == pDdiTable->pfnAppendMemAdvise) + if (nullptr == pDdiTable->pfnAppendMemAdvise) { pDdiTable->pfnAppendMemAdvise = driver_ddiTable.core_ddiTable.CommandList.pfnAppendMemAdvise; + } pDdiTable->pfnAppendSignalEvent = (ze_pfnCommandListAppendSignalEvent_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendSignalEvent_Tracing"); - if (nullptr == pDdiTable->pfnAppendSignalEvent) + if (nullptr == pDdiTable->pfnAppendSignalEvent) { pDdiTable->pfnAppendSignalEvent = driver_ddiTable.core_ddiTable.CommandList.pfnAppendSignalEvent; + } pDdiTable->pfnAppendWaitOnEvents = (ze_pfnCommandListAppendWaitOnEvents_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendWaitOnEvents_Tracing"); - if (nullptr == pDdiTable->pfnAppendWaitOnEvents) + if (nullptr == pDdiTable->pfnAppendWaitOnEvents) { pDdiTable->pfnAppendWaitOnEvents = driver_ddiTable.core_ddiTable.CommandList.pfnAppendWaitOnEvents; + } pDdiTable->pfnAppendEventReset = (ze_pfnCommandListAppendEventReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendEventReset_Tracing"); - if (nullptr == pDdiTable->pfnAppendEventReset) + if (nullptr == pDdiTable->pfnAppendEventReset) { pDdiTable->pfnAppendEventReset = driver_ddiTable.core_ddiTable.CommandList.pfnAppendEventReset; + } pDdiTable->pfnAppendLaunchKernel = (ze_pfnCommandListAppendLaunchKernel_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchKernel_Tracing"); - if (nullptr == pDdiTable->pfnAppendLaunchKernel) + if (nullptr == pDdiTable->pfnAppendLaunchKernel) { pDdiTable->pfnAppendLaunchKernel = driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernel; + } pDdiTable->pfnAppendLaunchCooperativeKernel = (ze_pfnCommandListAppendLaunchCooperativeKernel_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchCooperativeKernel_Tracing"); - if (nullptr == pDdiTable->pfnAppendLaunchCooperativeKernel) + if (nullptr == pDdiTable->pfnAppendLaunchCooperativeKernel) { pDdiTable->pfnAppendLaunchCooperativeKernel = driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchCooperativeKernel; + } pDdiTable->pfnAppendLaunchKernelIndirect = (ze_pfnCommandListAppendLaunchKernelIndirect_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchKernelIndirect_Tracing"); - if (nullptr == pDdiTable->pfnAppendLaunchKernelIndirect) + if (nullptr == pDdiTable->pfnAppendLaunchKernelIndirect) { pDdiTable->pfnAppendLaunchKernelIndirect = driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchKernelIndirect; + } pDdiTable->pfnAppendLaunchMultipleKernelsIndirect = (ze_pfnCommandListAppendLaunchMultipleKernelsIndirect_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeCommandListAppendLaunchMultipleKernelsIndirect_Tracing"); - if (nullptr == pDdiTable->pfnAppendLaunchMultipleKernelsIndirect) + if (nullptr == pDdiTable->pfnAppendLaunchMultipleKernelsIndirect) { pDdiTable->pfnAppendLaunchMultipleKernelsIndirect = driver_ddiTable.core_ddiTable.CommandList.pfnAppendLaunchMultipleKernelsIndirect; + } } return result; } @@ -399,20 +461,25 @@ zeGetFenceProcAddrTable( driver_ddiTable.core_ddiTable.Fence = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnFenceCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceCreate_Tracing"); - if (nullptr == pDdiTable->pfnCreate) + if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Fence.pfnCreate; + } pDdiTable->pfnDestroy = (ze_pfnFenceDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Fence.pfnDestroy; + } pDdiTable->pfnHostSynchronize = (ze_pfnFenceHostSynchronize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceHostSynchronize_Tracing"); - if (nullptr == pDdiTable->pfnHostSynchronize) + if (nullptr == pDdiTable->pfnHostSynchronize) { pDdiTable->pfnHostSynchronize = driver_ddiTable.core_ddiTable.Fence.pfnHostSynchronize; + } pDdiTable->pfnQueryStatus = (ze_pfnFenceQueryStatus_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceQueryStatus_Tracing"); - if (nullptr == pDdiTable->pfnQueryStatus) + if (nullptr == pDdiTable->pfnQueryStatus) { pDdiTable->pfnQueryStatus = driver_ddiTable.core_ddiTable.Fence.pfnQueryStatus; + } pDdiTable->pfnReset = (ze_pfnFenceReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeFenceReset_Tracing"); - if (nullptr == pDdiTable->pfnReset) + if (nullptr == pDdiTable->pfnReset) { pDdiTable->pfnReset = driver_ddiTable.core_ddiTable.Fence.pfnReset; + } } return result; } @@ -441,20 +508,25 @@ zeGetEventPoolProcAddrTable( driver_ddiTable.core_ddiTable.EventPool = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnEventPoolCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolCreate_Tracing"); - if (nullptr == pDdiTable->pfnCreate) + if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.EventPool.pfnCreate; + } pDdiTable->pfnDestroy = (ze_pfnEventPoolDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.EventPool.pfnDestroy; + } pDdiTable->pfnGetIpcHandle = (ze_pfnEventPoolGetIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolGetIpcHandle_Tracing"); - if (nullptr == pDdiTable->pfnGetIpcHandle) + if (nullptr == pDdiTable->pfnGetIpcHandle) { pDdiTable->pfnGetIpcHandle = driver_ddiTable.core_ddiTable.EventPool.pfnGetIpcHandle; + } pDdiTable->pfnOpenIpcHandle = (ze_pfnEventPoolOpenIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolOpenIpcHandle_Tracing"); - if (nullptr == pDdiTable->pfnOpenIpcHandle) + if (nullptr == pDdiTable->pfnOpenIpcHandle) { pDdiTable->pfnOpenIpcHandle = driver_ddiTable.core_ddiTable.EventPool.pfnOpenIpcHandle; + } pDdiTable->pfnCloseIpcHandle = (ze_pfnEventPoolCloseIpcHandle_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventPoolCloseIpcHandle_Tracing"); - if (nullptr == pDdiTable->pfnCloseIpcHandle) + if (nullptr == pDdiTable->pfnCloseIpcHandle) { pDdiTable->pfnCloseIpcHandle = driver_ddiTable.core_ddiTable.EventPool.pfnCloseIpcHandle; + } } return result; } @@ -485,26 +557,33 @@ zeGetEventProcAddrTable( driver_ddiTable.core_ddiTable.Event = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnEventCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventCreate_Tracing"); - if (nullptr == pDdiTable->pfnCreate) + if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Event.pfnCreate; + } pDdiTable->pfnDestroy = (ze_pfnEventDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Event.pfnDestroy; + } pDdiTable->pfnHostSignal = (ze_pfnEventHostSignal_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventHostSignal_Tracing"); - if (nullptr == pDdiTable->pfnHostSignal) + if (nullptr == pDdiTable->pfnHostSignal) { pDdiTable->pfnHostSignal = driver_ddiTable.core_ddiTable.Event.pfnHostSignal; + } pDdiTable->pfnHostSynchronize = (ze_pfnEventHostSynchronize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventHostSynchronize_Tracing"); - if (nullptr == pDdiTable->pfnHostSynchronize) + if (nullptr == pDdiTable->pfnHostSynchronize) { pDdiTable->pfnHostSynchronize = driver_ddiTable.core_ddiTable.Event.pfnHostSynchronize; + } pDdiTable->pfnQueryStatus = (ze_pfnEventQueryStatus_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventQueryStatus_Tracing"); - if (nullptr == pDdiTable->pfnQueryStatus) + if (nullptr == pDdiTable->pfnQueryStatus) { pDdiTable->pfnQueryStatus = driver_ddiTable.core_ddiTable.Event.pfnQueryStatus; + } pDdiTable->pfnHostReset = (ze_pfnEventHostReset_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventHostReset_Tracing"); - if (nullptr == pDdiTable->pfnHostReset) + if (nullptr == pDdiTable->pfnHostReset) { pDdiTable->pfnHostReset = driver_ddiTable.core_ddiTable.Event.pfnHostReset; + } pDdiTable->pfnGetTimestamp = (ze_pfnEventGetTimestamp_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeEventGetTimestamp_Tracing"); - if (nullptr == pDdiTable->pfnGetTimestamp) + if (nullptr == pDdiTable->pfnGetTimestamp) { pDdiTable->pfnGetTimestamp = driver_ddiTable.core_ddiTable.Event.pfnGetTimestamp; + } } return result; } @@ -531,14 +610,17 @@ zeGetImageProcAddrTable( driver_ddiTable.core_ddiTable.Image = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnGetProperties = (ze_pfnImageGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeImageGetProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetProperties) + if (nullptr == pDdiTable->pfnGetProperties) { pDdiTable->pfnGetProperties = driver_ddiTable.core_ddiTable.Image.pfnGetProperties; + } pDdiTable->pfnCreate = (ze_pfnImageCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeImageCreate_Tracing"); - if (nullptr == pDdiTable->pfnCreate) + if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Image.pfnCreate; + } pDdiTable->pfnDestroy = (ze_pfnImageDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeImageDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Image.pfnDestroy; + } } return result; } @@ -568,23 +650,29 @@ zeGetModuleProcAddrTable( driver_ddiTable.core_ddiTable.Module = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnModuleCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleCreate_Tracing"); - if (nullptr == pDdiTable->pfnCreate) + if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Module.pfnCreate; + } pDdiTable->pfnDestroy = (ze_pfnModuleDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Module.pfnDestroy; + } pDdiTable->pfnGetNativeBinary = (ze_pfnModuleGetNativeBinary_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetNativeBinary_Tracing"); - if (nullptr == pDdiTable->pfnGetNativeBinary) + if (nullptr == pDdiTable->pfnGetNativeBinary) { pDdiTable->pfnGetNativeBinary = driver_ddiTable.core_ddiTable.Module.pfnGetNativeBinary; + } pDdiTable->pfnGetGlobalPointer = (ze_pfnModuleGetGlobalPointer_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetGlobalPointer_Tracing"); - if (nullptr == pDdiTable->pfnGetGlobalPointer) + if (nullptr == pDdiTable->pfnGetGlobalPointer) { pDdiTable->pfnGetGlobalPointer = driver_ddiTable.core_ddiTable.Module.pfnGetGlobalPointer; + } pDdiTable->pfnGetFunctionPointer = (ze_pfnModuleGetFunctionPointer_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetFunctionPointer_Tracing"); - if (nullptr == pDdiTable->pfnGetFunctionPointer) + if (nullptr == pDdiTable->pfnGetFunctionPointer) { pDdiTable->pfnGetFunctionPointer = driver_ddiTable.core_ddiTable.Module.pfnGetFunctionPointer; + } pDdiTable->pfnGetKernelNames = (ze_pfnModuleGetKernelNames_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleGetKernelNames_Tracing"); - if (nullptr == pDdiTable->pfnGetKernelNames) + if (nullptr == pDdiTable->pfnGetKernelNames) { pDdiTable->pfnGetKernelNames = driver_ddiTable.core_ddiTable.Module.pfnGetKernelNames; + } } return result; } @@ -610,11 +698,13 @@ zeGetModuleBuildLogProcAddrTable( driver_ddiTable.core_ddiTable.ModuleBuildLog = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnDestroy = (ze_pfnModuleBuildLogDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleBuildLogDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnDestroy; + } pDdiTable->pfnGetString = (ze_pfnModuleBuildLogGetString_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeModuleBuildLogGetString_Tracing"); - if (nullptr == pDdiTable->pfnGetString) + if (nullptr == pDdiTable->pfnGetString) { pDdiTable->pfnGetString = driver_ddiTable.core_ddiTable.ModuleBuildLog.pfnGetString; + } } return result; } @@ -648,35 +738,45 @@ zeGetKernelProcAddrTable( driver_ddiTable.core_ddiTable.Kernel = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnSetIntermediateCacheConfig = (ze_pfnKernelSetIntermediateCacheConfig_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetIntermediateCacheConfig_Tracing"); - if (nullptr == pDdiTable->pfnSetIntermediateCacheConfig) + if (nullptr == pDdiTable->pfnSetIntermediateCacheConfig) { pDdiTable->pfnSetIntermediateCacheConfig = driver_ddiTable.core_ddiTable.Kernel.pfnSetIntermediateCacheConfig; + } pDdiTable->pfnCreate = (ze_pfnKernelCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelCreate_Tracing"); - if (nullptr == pDdiTable->pfnCreate) + if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Kernel.pfnCreate; + } pDdiTable->pfnDestroy = (ze_pfnKernelDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Kernel.pfnDestroy; + } pDdiTable->pfnSetGroupSize = (ze_pfnKernelSetGroupSize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetGroupSize_Tracing"); - if (nullptr == pDdiTable->pfnSetGroupSize) + if (nullptr == pDdiTable->pfnSetGroupSize) { pDdiTable->pfnSetGroupSize = driver_ddiTable.core_ddiTable.Kernel.pfnSetGroupSize; + } pDdiTable->pfnSuggestGroupSize = (ze_pfnKernelSuggestGroupSize_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSuggestGroupSize_Tracing"); - if (nullptr == pDdiTable->pfnSuggestGroupSize) + if (nullptr == pDdiTable->pfnSuggestGroupSize) { pDdiTable->pfnSuggestGroupSize = driver_ddiTable.core_ddiTable.Kernel.pfnSuggestGroupSize; + } pDdiTable->pfnSuggestMaxCooperativeGroupCount = (ze_pfnKernelSuggestMaxCooperativeGroupCount_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSuggestMaxCooperativeGroupCount_Tracing"); - if (nullptr == pDdiTable->pfnSuggestMaxCooperativeGroupCount) + if (nullptr == pDdiTable->pfnSuggestMaxCooperativeGroupCount) { pDdiTable->pfnSuggestMaxCooperativeGroupCount = driver_ddiTable.core_ddiTable.Kernel.pfnSuggestMaxCooperativeGroupCount; + } pDdiTable->pfnSetArgumentValue = (ze_pfnKernelSetArgumentValue_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetArgumentValue_Tracing"); - if (nullptr == pDdiTable->pfnSetArgumentValue) + if (nullptr == pDdiTable->pfnSetArgumentValue) { pDdiTable->pfnSetArgumentValue = driver_ddiTable.core_ddiTable.Kernel.pfnSetArgumentValue; + } pDdiTable->pfnSetAttribute = (ze_pfnKernelSetAttribute_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelSetAttribute_Tracing"); - if (nullptr == pDdiTable->pfnSetAttribute) + if (nullptr == pDdiTable->pfnSetAttribute) { pDdiTable->pfnSetAttribute = driver_ddiTable.core_ddiTable.Kernel.pfnSetAttribute; + } pDdiTable->pfnGetAttribute = (ze_pfnKernelGetAttribute_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelGetAttribute_Tracing"); - if (nullptr == pDdiTable->pfnGetAttribute) + if (nullptr == pDdiTable->pfnGetAttribute) { pDdiTable->pfnGetAttribute = driver_ddiTable.core_ddiTable.Kernel.pfnGetAttribute; + } pDdiTable->pfnGetProperties = (ze_pfnKernelGetProperties_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeKernelGetProperties_Tracing"); - if (nullptr == pDdiTable->pfnGetProperties) + if (nullptr == pDdiTable->pfnGetProperties) { pDdiTable->pfnGetProperties = driver_ddiTable.core_ddiTable.Kernel.pfnGetProperties; + } } return result; } @@ -702,11 +802,13 @@ zeGetSamplerProcAddrTable( driver_ddiTable.core_ddiTable.Sampler = *pDdiTable; if (driver_ddiTable.enableTracing) { pDdiTable->pfnCreate = (ze_pfnSamplerCreate_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeSamplerCreate_Tracing"); - if (nullptr == pDdiTable->pfnCreate) + if (nullptr == pDdiTable->pfnCreate) { pDdiTable->pfnCreate = driver_ddiTable.core_ddiTable.Sampler.pfnCreate; + } pDdiTable->pfnDestroy = (ze_pfnSamplerDestroy_t)GET_FUNCTION_PTR(driver_ddiTable.driverLibrary, "zeSamplerDestroy_Tracing"); - if (nullptr == pDdiTable->pfnDestroy) + if (nullptr == pDdiTable->pfnDestroy) { pDdiTable->pfnDestroy = driver_ddiTable.core_ddiTable.Sampler.pfnDestroy; + } } return result; } diff --git a/level_zero/core/source/builtin_functions_lib.h b/level_zero/core/source/builtin_functions_lib.h index 6e92c5ce0d..f3c5073915 100644 --- a/level_zero/core/source/builtin_functions_lib.h +++ b/level_zero/core/source/builtin_functions_lib.h @@ -19,23 +19,23 @@ struct Kernel; enum class Builtin : uint32_t { CopyBufferBytes = 0u, - CopyBufferToBufferSide, - CopyBufferToBufferMiddle, - CopyImageRegion, - FillBufferImmediate, - FillBufferSSHOffset, CopyBufferRectBytes2d, CopyBufferRectBytes3d, - CopyBufferToImage3dBytes, + CopyBufferToBufferMiddle, + CopyBufferToBufferSide, + CopyBufferToImage3d16Bytes, CopyBufferToImage3d2Bytes, CopyBufferToImage3d4Bytes, CopyBufferToImage3d8Bytes, - CopyBufferToImage3d16Bytes, - CopyImage3dToBufferBytes, + CopyBufferToImage3dBytes, + CopyImage3dToBuffer16Bytes, CopyImage3dToBuffer2Bytes, CopyImage3dToBuffer4Bytes, CopyImage3dToBuffer8Bytes, - CopyImage3dToBuffer16Bytes, + CopyImage3dToBufferBytes, + CopyImageRegion, + FillBufferImmediate, + FillBufferSSHOffset, COUNT }; diff --git a/level_zero/core/source/builtin_functions_lib_impl.cpp b/level_zero/core/source/builtin_functions_lib_impl.cpp index 25011bdfb8..06d56ca206 100644 --- a/level_zero/core/source/builtin_functions_lib_impl.cpp +++ b/level_zero/core/source/builtin_functions_lib_impl.cpp @@ -39,26 +39,6 @@ void BuiltinFunctionsLibImpl::initFunctions() { builtinName = "copyBufferToBufferBytesSingle"; builtin = NEO::EBuiltInOps::CopyBufferToBuffer; break; - case Builtin::CopyBufferToBufferSide: - builtinName = "CopyBufferToBufferSideRegion"; - builtin = NEO::EBuiltInOps::CopyBufferToBuffer; - break; - case Builtin::CopyBufferToBufferMiddle: - builtinName = "CopyBufferToBufferMiddleRegion"; - builtin = NEO::EBuiltInOps::CopyBufferToBuffer; - break; - case Builtin::CopyImageRegion: - builtinName = "CopyImageToImage3d"; - builtin = NEO::EBuiltInOps::CopyImageToImage3d; - break; - case Builtin::FillBufferImmediate: - builtinName = "FillBufferImmediate"; - builtin = NEO::EBuiltInOps::FillBuffer; - break; - case Builtin::FillBufferSSHOffset: - builtinName = "FillBufferSSHOffset"; - builtin = NEO::EBuiltInOps::FillBuffer; - break; case Builtin::CopyBufferRectBytes2d: builtinName = "CopyBufferRectBytes2d"; builtin = NEO::EBuiltInOps::CopyBufferRect; @@ -67,8 +47,16 @@ void BuiltinFunctionsLibImpl::initFunctions() { builtinName = "CopyBufferRectBytes3d"; builtin = NEO::EBuiltInOps::CopyBufferRect; break; - case Builtin::CopyBufferToImage3dBytes: - builtinName = "CopyBufferToImage3dBytes"; + case Builtin::CopyBufferToBufferMiddle: + builtinName = "CopyBufferToBufferMiddleRegion"; + builtin = NEO::EBuiltInOps::CopyBufferToBuffer; + break; + case Builtin::CopyBufferToBufferSide: + builtinName = "CopyBufferToBufferSideRegion"; + builtin = NEO::EBuiltInOps::CopyBufferToBuffer; + break; + case Builtin::CopyBufferToImage3d16Bytes: + builtinName = "CopyBufferToImage3d16Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; case Builtin::CopyBufferToImage3d2Bytes: @@ -83,12 +71,12 @@ void BuiltinFunctionsLibImpl::initFunctions() { builtinName = "CopyBufferToImage3d8Bytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; - case Builtin::CopyBufferToImage3d16Bytes: - builtinName = "CopyBufferToImage3d16Bytes"; + case Builtin::CopyBufferToImage3dBytes: + builtinName = "CopyBufferToImage3dBytes"; builtin = NEO::EBuiltInOps::CopyBufferToImage3d; break; - case Builtin::CopyImage3dToBufferBytes: - builtinName = "CopyImage3dToBufferBytes"; + case Builtin::CopyImage3dToBuffer16Bytes: + builtinName = "CopyImage3dToBuffer16Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; case Builtin::CopyImage3dToBuffer2Bytes: @@ -103,10 +91,22 @@ void BuiltinFunctionsLibImpl::initFunctions() { builtinName = "CopyImage3dToBuffer8Bytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; - case Builtin::CopyImage3dToBuffer16Bytes: - builtinName = "CopyImage3dToBuffer16Bytes"; + case Builtin::CopyImage3dToBufferBytes: + builtinName = "CopyImage3dToBufferBytes"; builtin = NEO::EBuiltInOps::CopyImage3dToBuffer; break; + case Builtin::CopyImageRegion: + builtinName = "CopyImageToImage3d"; + builtin = NEO::EBuiltInOps::CopyImageToImage3d; + break; + case Builtin::FillBufferImmediate: + builtinName = "FillBufferImmediate"; + builtin = NEO::EBuiltInOps::FillBuffer; + break; + case Builtin::FillBufferSSHOffset: + builtinName = "FillBufferSSHOffset"; + builtin = NEO::EBuiltInOps::FillBuffer; + break; default: continue; }; diff --git a/level_zero/core/source/cmdlist_hw.inl b/level_zero/core/source/cmdlist_hw.inl index 19ef860a94..31c8a0bca9 100644 --- a/level_zero/core/source/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist_hw.inl @@ -73,14 +73,18 @@ template void CommandListCoreFamily::programL3(bool isSLMused) {} template -ze_result_t CommandListCoreFamily::appendLaunchFunction( - ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, - ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { +ze_result_t CommandListCoreFamily::appendLaunchFunction(ze_kernel_handle_t hFunction, + const ze_group_count_t *pThreadGroupDimensions, + ze_event_handle_t hEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + if (addEventsToCmdList(hEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - ze_result_t ret = appendLaunchFunctionWithParams(hFunction, pThreadGroupDimensions, hEvent, numWaitEvents, phWaitEvents, false, false); + ze_result_t ret = appendLaunchFunctionWithParams(hFunction, pThreadGroupDimensions, hEvent, + numWaitEvents, phWaitEvents, false, false); if (ret != ZE_RESULT_SUCCESS) { return ret; } @@ -94,18 +98,23 @@ ze_result_t CommandListCoreFamily::appendLaunchCooperativeKernel( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; } template -ze_result_t CommandListCoreFamily::appendLaunchFunctionIndirect( - ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer, - ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { +ze_result_t CommandListCoreFamily::appendLaunchFunctionIndirect(ze_kernel_handle_t hFunction, + const ze_group_count_t *pDispatchArgumentsBuffer, + ze_event_handle_t hEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + if (addEventsToCmdList(hEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } - ze_result_t ret = appendLaunchFunctionWithParams(hFunction, pDispatchArgumentsBuffer, nullptr, 0, nullptr, true, false); + ze_result_t ret = appendLaunchFunctionWithParams(hFunction, pDispatchArgumentsBuffer, + nullptr, 0, nullptr, true, false); if (hEvent) { appendSignalEventPostWalker(hEvent); @@ -122,6 +131,7 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleFunctionsI ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + if (addEventsToCmdList(hEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } @@ -163,6 +173,7 @@ template ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + if (addEventsToCmdList(hSignalEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } @@ -177,9 +188,13 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ } template -ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier( - uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges, - ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { +ze_result_t CommandListCoreFamily::appendMemoryRangesBarrier(uint32_t numRanges, + const size_t *pRangeSizes, + const void **pRanges, + ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + if (addEventsToCmdList(hSignalEvent, numWaitEvents, phWaitEvents) == ZE_RESULT_ERROR_INVALID_ARGUMENT) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } @@ -246,7 +261,8 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_i break; } - builtinKernel->setArgBufferWithAlloc(0u, reinterpret_cast(&allocationStruct.alignedAllocationPtr), allocationStruct.alloc); + builtinKernel->setArgBufferWithAlloc(0u, reinterpret_cast(&allocationStruct.alignedAllocationPtr), + allocationStruct.alloc); builtinKernel->setArgRedescribedImage(1u, hDstImage); builtinKernel->setArgumentValue(2u, sizeof(size_t), &allocationStruct.offset); @@ -258,7 +274,8 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_i builtinKernel->setArgumentValue(3u, sizeof(origin), &origin); auto srcRowPitch = pDstRegion->width * bytesPerPixel; - auto srcSlicePitch = (image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pDstRegion->height) * srcRowPitch; + auto srcSlicePitch = + (image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pDstRegion->height) * srcRowPitch; uint32_t pitch[] = { srcRowPitch, @@ -269,19 +286,24 @@ ze_result_t CommandListCoreFamily::appendImageCopyFromMemory(ze_i uint32_t groupSizeY = pDstRegion->height; uint32_t groupSizeZ = pDstRegion->depth; - if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, + &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (pDstRegion->width % groupSizeX || pDstRegion->height % groupSizeY || pDstRegion->depth % groupSizeZ) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } - ze_group_count_t functionArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY, pDstRegion->depth / groupSizeZ}; + ze_group_count_t functionArgs{pDstRegion->width / groupSizeX, pDstRegion->height / groupSizeY, + pDstRegion->depth / groupSizeZ}; return this->appendLaunchFunction(builtinKernel->toHandle(), &functionArgs, hEvent, numWaitEvents, phWaitEvents); @@ -336,7 +358,8 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void * } builtinKernel->setArgRedescribedImage(0u, hSrcImage); - builtinKernel->setArgBufferWithAlloc(1u, reinterpret_cast(&allocationStruct.alignedAllocationPtr), allocationStruct.alloc); + builtinKernel->setArgBufferWithAlloc(1u, reinterpret_cast(&allocationStruct.alignedAllocationPtr), + allocationStruct.alloc); uint32_t origin[] = { static_cast(pSrcRegion->originX), @@ -348,7 +371,8 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void * builtinKernel->setArgumentValue(3u, sizeof(size_t), &allocationStruct.offset); auto srcRowPitch = pSrcRegion->width * bytesPerPixel; - auto srcSlicePitch = (image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pSrcRegion->height) * srcRowPitch; + auto srcSlicePitch = + (image->getImageInfo().imgDesc.imageType == NEO::ImageType::Image1DArray ? 1 : pSrcRegion->height) * srcRowPitch; uint32_t pitch[] = { srcRowPitch, @@ -359,19 +383,24 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void * uint32_t groupSizeY = pSrcRegion->height; uint32_t groupSizeZ = pSrcRegion->depth; - if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + if (builtinKernel->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, + &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (pSrcRegion->width % groupSizeX || pSrcRegion->height % groupSizeY || pSrcRegion->depth % groupSizeZ) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } - ze_group_count_t functionArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY, pSrcRegion->depth / groupSizeZ}; + ze_group_count_t functionArgs{pSrcRegion->width / groupSizeX, pSrcRegion->height / groupSizeY, + pSrcRegion->depth / groupSizeZ}; auto ret = CommandListCoreFamily::appendLaunchFunction(builtinKernel->toHandle(), &functionArgs, hEvent, numWaitEvents, phWaitEvents); @@ -433,19 +462,24 @@ ze_result_t CommandListCoreFamily::appendImageCopyRegion(ze_image uint32_t groupSizeY = srcRegion.height; uint32_t groupSizeZ = srcRegion.depth; - if (function->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + if (function->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, + &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (function->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (srcRegion.width % groupSizeX || srcRegion.height % groupSizeY || srcRegion.depth % groupSizeZ) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } - ze_group_count_t functionArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY, srcRegion.depth / groupSizeZ}; + ze_group_count_t functionArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY, + srcRegion.depth / groupSizeZ}; function->setArgRedescribedImage(0, hSrcImage); function->setArgRedescribedImage(1, hDstImage); @@ -464,6 +498,7 @@ ze_result_t CommandListCoreFamily::appendImageCopy(ze_image_handl ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { + return this->appendImageCopyRegion(hDstImage, hSrcImage, nullptr, nullptr, hEvent, numWaitEvents, phWaitEvents); } @@ -472,20 +507,31 @@ template ze_result_t CommandListCoreFamily::appendMemAdvise(ze_device_handle_t hDevice, const void *ptr, size_t size, ze_memory_advice_t advice) { + auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(ptr); if (allocData) { if (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { return ZE_RESULT_SUCCESS; } else { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } } + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } template -ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA( - void *dstPtr, NEO::GraphicsAllocation *dstPtrAlloc, uint64_t dstOffset, void *srcPtr, NEO::GraphicsAllocation *srcPtrAlloc, uint64_t srcOffset, uint32_t size, uint32_t elementSize, Builtin builtin) { +ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA(void *dstPtr, + NEO::GraphicsAllocation *dstPtrAlloc, + uint64_t dstOffset, + void *srcPtr, + NEO::GraphicsAllocation *srcPtrAlloc, + uint64_t srcOffset, + uint32_t size, + uint32_t elementSize, + Builtin builtin) { + auto builtinFunction = device->getBuiltinFunctionsLib()->getFunction(builtin); uint32_t groupSizeX = builtinFunction->getImmutableData() @@ -495,6 +541,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA( uint32_t groupSizeZ = 1u; if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } @@ -510,13 +557,14 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernelWithGA( uint32_t groups = (size + ((groupSizeX * elementSize) - 1)) / (groupSizeX * elementSize); ze_group_count_t dispatchFuncArgs{groups, 1u, 1u}; - return CommandListCoreFamily::appendLaunchFunction(builtinFunction->toHandle(), &dispatchFuncArgs, nullptr, 0, - nullptr); + return CommandListCoreFamily::appendLaunchFunction(builtinFunction->toHandle(), &dispatchFuncArgs, + nullptr, 0, nullptr); } template -ze_result_t CommandListCoreFamily::appendPageFaultCopy( - NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) { +ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, + NEO::GraphicsAllocation *srcptr, + size_t size, bool flushHost) { auto builtinFunction = device->getBuiltinFunctionsLib()->getPageFaultFunction(); @@ -527,6 +575,7 @@ ze_result_t CommandListCoreFamily::appendPageFaultCopy( uint32_t groupSizeZ = 1u; if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ)) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } @@ -592,8 +641,10 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, if (ret == ZE_RESULT_SUCCESS && leftSize) { ret = appendMemoryCopyKernelWithGA(reinterpret_cast(&dstAllocationStruct.alignedAllocationPtr), - dstAllocationStruct.alloc, dstAllocationStruct.offset, reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), - srcAllocationStruct.alloc, srcAllocationStruct.offset, static_cast(leftSize), 1, + dstAllocationStruct.alloc, dstAllocationStruct.offset, + reinterpret_cast(&srcAllocationStruct.alignedAllocationPtr), + srcAllocationStruct.alloc, srcAllocationStruct.offset, + static_cast(leftSize), 1, Builtin::CopyBufferToBufferSide); } @@ -637,6 +688,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d uint32_t srcPitch, uint32_t srcSlicePitch, ze_event_handle_t hSignalEvent) { + uintptr_t destinationPtr = reinterpret_cast(dstPtr); size_t dstOffset = 0; NEO::EncodeSurfaceState::getSshAlignedPointer(destinationPtr, dstOffset); @@ -729,19 +781,24 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernel3d(const uint32_t groupSizeY = srcRegion->height; uint32_t groupSizeZ = srcRegion->depth; - if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, + &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (srcRegion->width % groupSizeX || srcRegion->height % groupSizeY || srcRegion->depth % groupSizeZ) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } - ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, srcRegion->depth / groupSizeZ}; + ze_group_count_t dispatchFuncArgs{srcRegion->width / groupSizeX, srcRegion->height / groupSizeY, + srcRegion->depth / groupSizeZ}; uint srcOrigin[3] = {(srcRegion->originX + static_cast(srcOffset)), (srcRegion->originY), (srcRegion->originZ)}; uint dstOrigin[3] = {(dstRegion->originX + static_cast(dstOffset)), (dstRegion->originY), (srcRegion->originZ)}; @@ -779,15 +836,19 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernel2d(const uint32_t groupSizeY = srcRegion->height; uint32_t groupSizeZ = 1u; - if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + if (builtinFunction->suggestGroupSize(groupSizeX, groupSizeY, groupSizeZ, &groupSizeX, + &groupSizeY, &groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (builtinFunction->setGroupSize(groupSizeX, groupSizeY, groupSizeZ) != ZE_RESULT_SUCCESS) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } if (srcRegion->width % groupSizeX || srcRegion->height % groupSizeY) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } @@ -803,7 +864,9 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyKernel2d(const builtinFunction->setArgumentValue(4, sizeof(srcPitch), &srcPitch); builtinFunction->setArgumentValue(5, sizeof(dstPitch), &dstPitch); - return CommandListCoreFamily::appendLaunchFunction(builtinFunction->toHandle(), &dispatchFuncArgs, hSignalEvent, numWaitEvents, + return CommandListCoreFamily::appendLaunchFunction(builtinFunction->toHandle(), + &dispatchFuncArgs, hSignalEvent, + numWaitEvents, phWaitEvents); } @@ -815,16 +878,21 @@ ze_result_t CommandListCoreFamily::appendMemoryPrefetch(const voi if (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) { return ZE_RESULT_SUCCESS; } else { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } } + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } template -ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, const void *pattern, - size_t patternSize, size_t size, +ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, + const void *pattern, + size_t patternSize, + size_t size, ze_event_handle_t hEvent) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; bool hostPointerNeedsFlush = false; @@ -855,6 +923,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, co groupSizeX = builtinFunction->getImmutableData()->getDescriptor().kernelAttributes.simdSize; if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } @@ -870,6 +939,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, co reinterpret_cast(srcPtr), srcOffset + patternSize, this); if (patternAlloc == nullptr) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } @@ -877,6 +947,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, co groupSizeX = static_cast(patternSize); if (builtinFunction->setGroupSize(groupSizeX, 1u, 1u)) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } @@ -900,6 +971,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, co uint32_t groupRemainderSizeX = static_cast(size) % groupSizeX; if (groupRemainderSizeX) { if (builtinFunction->setGroupSize(groupRemainderSizeX, 1u, 1u)) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } ze_group_count_t dispatchFuncArgs{1u, 1u, 1u}; @@ -938,7 +1010,10 @@ void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_ } template -inline uint64_t CommandListCoreFamily::getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region) { +inline uint64_t CommandListCoreFamily::getInputBufferSize(NEO::ImageType imageType, + uint64_t bytesPerPixel, + const ze_image_region_t *region) { + switch (imageType) { default: UNRECOVERABLE_IF(true); @@ -954,9 +1029,13 @@ inline uint64_t CommandListCoreFamily::getInputBufferSize(NEO::Im } template -inline AlignedAllocationData CommandListCoreFamily::getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize) { +inline AlignedAllocationData CommandListCoreFamily::getAlignedAllocation(Device *device, + const void *buffer, + uint64_t bufferSize) { + NEO::SvmAllocationData *allocData = nullptr; - bool srcAllocFound = device->getDriverHandle()->findAllocationDataForRange(const_cast(buffer), bufferSize, &allocData); + bool srcAllocFound = device->getDriverHandle()->findAllocationDataForRange(const_cast(buffer), + bufferSize, &allocData); NEO::GraphicsAllocation *alloc = nullptr; uintptr_t sourcePtr = reinterpret_cast(const_cast(buffer)); @@ -985,7 +1064,10 @@ inline AlignedAllocationData CommandListCoreFamily::getAlignedAll } template -inline ze_result_t CommandListCoreFamily::addEventsToCmdList(ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { +inline ze_result_t CommandListCoreFamily::addEventsToCmdList(ze_event_handle_t hEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + if (numWaitEvents > 0) { if (phWaitEvents) { CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents); @@ -1018,6 +1100,7 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han template ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; @@ -1079,16 +1162,22 @@ ze_result_t CommandListCoreFamily::prepareIndirectParams(const ze auto alloc = allocData->gpuAllocation; commandContainer.addToResidencyContainer(alloc); - NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMX, ptrOffset(alloc->getGpuAddress(), offsetof(ze_group_count_t, groupCountX))); - NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMY, ptrOffset(alloc->getGpuAddress(), offsetof(ze_group_count_t, groupCountY))); - NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMZ, ptrOffset(alloc->getGpuAddress(), offsetof(ze_group_count_t, groupCountZ))); + NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMX, + ptrOffset(alloc->getGpuAddress(), offsetof(ze_group_count_t, groupCountX))); + NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMY, + ptrOffset(alloc->getGpuAddress(), offsetof(ze_group_count_t, groupCountY))); + NEO::EncodeSetMMIO::encodeMEM(commandContainer, GPUGPU_DISPATCHDIMZ, + ptrOffset(alloc->getGpuAddress(), offsetof(ze_group_count_t, groupCountZ))); } return ZE_RESULT_SUCCESS; } template -ze_result_t CommandListCoreFamily::setGroupSizeIndirect(uint32_t offsets[3], void *crossThreadAddress, uint32_t lws[3]) { +ze_result_t CommandListCoreFamily::setGroupSizeIndirect(uint32_t offsets[3], + void *crossThreadAddress, + uint32_t lws[3]) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; NEO::EncodeIndirectParams::setGroupSizeIndirect(commandContainer, offsets, crossThreadAddress, lws); diff --git a/level_zero/core/source/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist_hw_immediate.h index 483017e4a1..25c2cd9fe6 100644 --- a/level_zero/core/source/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist_hw_immediate.h @@ -23,13 +23,15 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily(numIddsPerBlock) {} - ze_result_t appendLaunchFunction( - ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, - ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; + ze_result_t appendLaunchFunction(ze_kernel_handle_t hFunction, + const ze_group_count_t *pThreadGroupDimensions, + ze_event_handle_t hEvent, uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) override; - ze_result_t appendLaunchFunctionIndirect( - ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer, - ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; + ze_result_t appendLaunchFunctionIndirect(ze_kernel_handle_t hFunction, + const ze_group_count_t *pDispatchArgumentsBuffer, + ze_event_handle_t hEvent, uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) override; ze_result_t appendBarrier(ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, @@ -60,7 +62,8 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::appendLaunchFunction( ze_kernel_handle_t hFunction, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - auto ret = CommandListCoreFamily::appendLaunchFunction(hFunction, pThreadGroupDimensions, hEvent, numWaitEvents, phWaitEvents); + auto ret = CommandListCoreFamily::appendLaunchFunction(hFunction, pThreadGroupDimensions, + hEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } @@ -26,7 +27,8 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchFunctionI ze_kernel_handle_t hFunction, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - auto ret = CommandListCoreFamily::appendLaunchFunctionIndirect(hFunction, pDispatchArgumentsBuffer, hEvent, numWaitEvents, phWaitEvents); + auto ret = CommandListCoreFamily::appendLaunchFunctionIndirect(hFunction, pDispatchArgumentsBuffer, + hEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } @@ -34,9 +36,10 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchFunctionI } template -ze_result_t CommandListCoreFamilyImmediate::appendBarrier(ze_event_handle_t hSignalEvent, - uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents) { +ze_result_t CommandListCoreFamilyImmediate::appendBarrier( + ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { auto ret = CommandListCoreFamily::appendBarrier(hSignalEvent, numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { @@ -46,31 +49,36 @@ ze_result_t CommandListCoreFamilyImmediate::appendBarrier(ze_even } template -ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy(void *dstptr, - const void *srcptr, - size_t size, - ze_event_handle_t hSignalEvent, - uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents) { +ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( + void *dstptr, + const void *srcptr, + size_t size, + ze_event_handle_t hSignalEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { - auto ret = CommandListCoreFamily::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents); + auto ret = CommandListCoreFamily::appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, + numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } return ret; } template -ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegion(void *dstPtr, - const ze_copy_region_t *dstRegion, - uint32_t dstPitch, - uint32_t dstSlicePitch, - const void *srcPtr, - const ze_copy_region_t *srcRegion, - uint32_t srcPitch, - uint32_t srcSlicePitch, - ze_event_handle_t hSignalEvent) { +ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegion( + void *dstPtr, + const ze_copy_region_t *dstRegion, + uint32_t dstPitch, + uint32_t dstSlicePitch, + const void *srcPtr, + const ze_copy_region_t *srcRegion, + uint32_t srcPitch, + uint32_t srcSlicePitch, + ze_event_handle_t hSignalEvent) { - auto ret = CommandListCoreFamily::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch, srcPtr, srcRegion, srcPitch, srcSlicePitch, hSignalEvent); + auto ret = CommandListCoreFamily::appendMemoryCopyRegion(dstPtr, dstRegion, dstPitch, dstSlicePitch, + srcPtr, srcRegion, srcPitch, srcSlicePitch, + hSignalEvent); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } @@ -124,13 +132,16 @@ ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(ui } template -ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMemory(ze_image_handle_t hDstImage, - const void *srcPtr, - const ze_image_region_t *pDstRegion, - ze_event_handle_t hEvent, - uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents) { - auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hEvent, numWaitEvents, phWaitEvents); +ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMemory( + ze_image_handle_t hDstImage, + const void *srcPtr, + const ze_image_region_t *pDstRegion, + ze_event_handle_t hEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + + auto ret = CommandListCoreFamily::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hEvent, + numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } @@ -138,13 +149,16 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMe } template -ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemory(void *dstPtr, - ze_image_handle_t hSrcImage, - const ze_image_region_t *pSrcRegion, - ze_event_handle_t hEvent, - uint32_t numWaitEvents, - ze_event_handle_t *phWaitEvents) { - auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hEvent, numWaitEvents, phWaitEvents); +ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemory( + void *dstPtr, + ze_image_handle_t hSrcImage, + const ze_image_region_t *pSrcRegion, + ze_event_handle_t hEvent, + uint32_t numWaitEvents, + ze_event_handle_t *phWaitEvents) { + + auto ret = CommandListCoreFamily::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hEvent, + numWaitEvents, phWaitEvents); if (ret == ZE_RESULT_SUCCESS) { executeCommandListImmediate(true); } diff --git a/level_zero/core/source/cmdqueue.cpp b/level_zero/core/source/cmdqueue.cpp index 011d43b921..3344942522 100644 --- a/level_zero/core/source/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue.cpp @@ -84,15 +84,14 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint32_t timeout) void CommandQueueImp::printFunctionsPrintfOutput() { size_t size = this->printfFunctionContainer.size(); - if (size) { - for (size_t i = 0; i < size; i++) { - this->printfFunctionContainer[i]->printPrintfOutput(); - } - this->printfFunctionContainer.clear(); + for (size_t i = 0; i < size; i++) { + this->printfFunctionContainer[i]->printPrintfOutput(); } + this->printfFunctionContainer.clear(); } -CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) { +CommandQueue *CommandQueue::create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, + const ze_command_queue_desc_t *desc) { CommandQueueAllocatorFn allocator = nullptr; if (productFamily < IGFX_MAX_PRODUCT) { allocator = commandQueueFactory[productFamily]; diff --git a/level_zero/core/source/cmdqueue.h b/level_zero/core/source/cmdqueue.h index 5e0e3302df..7c49176fbb 100644 --- a/level_zero/core/source/cmdqueue.h +++ b/level_zero/core/source/cmdqueue.h @@ -41,15 +41,16 @@ struct CommandQueue : _ze_command_queue_handle_t { ze_fence_handle_t hFence) = 0; virtual ze_result_t synchronize(uint32_t timeout) = 0; - static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc); + static CommandQueue *create(uint32_t productFamily, Device *device, NEO::CommandStreamReceiver *csr, + const ze_command_queue_desc_t *desc); static CommandQueue *fromHandle(ze_command_queue_handle_t handle) { return static_cast(handle); } - inline ze_command_queue_handle_t toHandle() { return this; } + ze_command_queue_handle_t toHandle() { return this; } - inline void setCommandQueuePreemptionMode(NEO::PreemptionMode newPreemptionMode) { + void setCommandQueuePreemptionMode(NEO::PreemptionMode newPreemptionMode) { commandQueuePreemptionMode = newPreemptionMode; } @@ -58,7 +59,8 @@ struct CommandQueue : _ze_command_queue_handle_t { NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial; }; -using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc); +using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr, + const ze_command_queue_desc_t *desc); extern CommandQueueAllocatorFn commandQueueFactory[]; template diff --git a/level_zero/core/source/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue_hw.inl index 74ea965386..a64c984f1d 100644 --- a/level_zero/core/source/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue_hw.inl @@ -50,7 +50,11 @@ ze_result_t CommandQueueHw::destroy() { template ze_result_t CommandQueueHw::executeCommandLists( - uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, ze_fence_handle_t hFence, bool performMigration) { + uint32_t numCommandLists, + ze_command_list_handle_t *phCommandLists, + ze_fence_handle_t hFence, + bool performMigration) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; diff --git a/level_zero/core/source/cmdqueue_imp.h b/level_zero/core/source/cmdqueue_imp.h index 7fc2e7c580..60fed73d85 100644 --- a/level_zero/core/source/cmdqueue_imp.h +++ b/level_zero/core/source/cmdqueue_imp.h @@ -59,7 +59,7 @@ struct CommandQueueImp : public CommandQueue { NEO::CSRequirements::csOverfetchSize; CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr, const ze_command_queue_desc_t *desc) - : device(device), csr(csr), desc(*desc), commandStream(nullptr) { + : device(device), csr(csr), desc(*desc) { std::atomic_init(&commandQueuePerThreadScratchSize, 0u); } @@ -86,10 +86,10 @@ struct CommandQueueImp : public CommandQueue { void printFunctionsPrintfOutput(); - Device *device; - NEO::CommandStreamReceiver *csr; + Device *device = nullptr; + NEO::CommandStreamReceiver *csr = nullptr; const ze_command_queue_desc_t desc; - NEO::LinearStream *commandStream; + NEO::LinearStream *commandStream = nullptr; uint32_t taskCount = 0; std::vector printfFunctionContainer; bool gsbaInit = false; diff --git a/level_zero/core/source/device_imp.cpp b/level_zero/core/source/device_imp.cpp index 96f6e78836..8b44e12f0a 100644 --- a/level_zero/core/source/device_imp.cpp +++ b/level_zero/core/source/device_imp.cpp @@ -248,6 +248,7 @@ ze_result_t DeviceImp::getKernelProperties(ze_device_kernel_properties_t *pKerne uint32_t minorSpirvVersion = static_cast(std::stoul(ilVersion.substr(minorVersionPos + 1))); pKernelProperties->spirvVersionSupported = ZE_MAKE_VERSION(majorSpirvVersion, minorSpirvVersion); } else { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } diff --git a/level_zero/core/source/driver_handle_imp.cpp b/level_zero/core/source/driver_handle_imp.cpp index aebbbf53dc..7ca76a07a1 100644 --- a/level_zero/core/source/driver_handle_imp.cpp +++ b/level_zero/core/source/driver_handle_imp.cpp @@ -71,6 +71,7 @@ inline ze_memory_type_t parseUSMType(InternalMemoryType memoryType) { ze_result_t DriverHandleImp::getExtensionFunctionAddress(const char *pFuncName, void **pfunc) { *pfunc = this->osLibrary->getProcAddress(std::string(pFuncName)); if (*pfunc == nullptr) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } return ZE_RESULT_SUCCESS; diff --git a/level_zero/core/source/image_hw.h b/level_zero/core/source/image_hw.h index f3fbd7388d..b37484a320 100644 --- a/level_zero/core/source/image_hw.h +++ b/level_zero/core/source/image_hw.h @@ -24,7 +24,6 @@ struct ImageCoreFamily : public ImageImp { using RSS = typename GfxFamily::RENDER_SURFACE_STATE; using RENDER_FORMAT = typename RENDER_SURFACE_STATE::SURFACE_FORMAT; using SHADER_CHANNEL_SELECT = typename RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT; - using BaseClass = ImageImp; using ImageImp::ImageImp; static const RENDER_FORMAT surfaceFormatUndefined = static_cast(-1); @@ -41,33 +40,89 @@ struct ImageCoreFamily : public ImageImp { NEO::SurfaceFormatInfo surfaceFormatTable[ZE_IMAGE_FORMAT_RENDER_LAYOUT_MAX + 1] [ZE_IMAGE_FORMAT_TYPE_MAX + 1] = { // ZE_IMAGE_FORMAT_LAYOUT_8 - {{GMM_FORMAT_R8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_UINT), 0, 1, 1, 1}, {GMM_FORMAT_R8_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_SINT), 0, 1, 1, 1}, {GMM_FORMAT_R8_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_UNORM), 0, 1, 1, 1}, {GMM_FORMAT_R8_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_SNORM), 0, 1, 1, 1}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 1, 1, 1}}, + {{GMM_FORMAT_R8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_UINT), 0, 1, 1, 1}, + {GMM_FORMAT_R8_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_SINT), 0, 1, 1, 1}, + {GMM_FORMAT_R8_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_UNORM), 0, 1, 1, 1}, + {GMM_FORMAT_R8_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8_SNORM), 0, 1, 1, 1}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 1, 1, 1}}, // ZE_IMAGE_FORMAT_LAYOUT_16 - {{GMM_FORMAT_R16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_UINT), 0, 1, 2, 2}, {GMM_FORMAT_R16_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_SINT), 0, 1, 2, 2}, {GMM_FORMAT_R16_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_UNORM), 0, 1, 2, 2}, {GMM_FORMAT_R16_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_SNORM), 0, 1, 2, 2}, {GMM_FORMAT_R16_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_FLOAT), 0, 1, 2, 2}}, + {{GMM_FORMAT_R16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_UINT), 0, 1, 2, 2}, + {GMM_FORMAT_R16_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_SINT), 0, 1, 2, 2}, + {GMM_FORMAT_R16_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_UNORM), 0, 1, 2, 2}, + {GMM_FORMAT_R16_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_SNORM), 0, 1, 2, 2}, + {GMM_FORMAT_R16_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16_FLOAT), 0, 1, 2, 2}}, // ZE_IMAGE_FORMAT_LAYOUT_32 - {{GMM_FORMAT_R32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_UINT), 0, 1, 4, 4}, {GMM_FORMAT_R32_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_SINT), 0, 1, 4, 4}, {GMM_FORMAT_R32_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_UNORM), 0, 1, 4, 4}, {GMM_FORMAT_R32_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_SNORM), 0, 1, 4, 4}, {GMM_FORMAT_R32_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_FLOAT), 0, 1, 4, 4}}, + {{GMM_FORMAT_R32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_UINT), 0, 1, 4, 4}, + {GMM_FORMAT_R32_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_SINT), 0, 1, 4, 4}, + {GMM_FORMAT_R32_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_UNORM), 0, 1, 4, 4}, + {GMM_FORMAT_R32_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_SNORM), 0, 1, 4, 4}, + {GMM_FORMAT_R32_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32_FLOAT), 0, 1, 4, 4}}, // ZE_IMAGE_FORMAT_LAYOUT_8_8 - {{GMM_FORMAT_R8G8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_UINT), 0, 2, 1, 2}, {GMM_FORMAT_R8G8_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_SINT), 0, 2, 1, 2}, {GMM_FORMAT_R8G8_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_UNORM), 0, 2, 1, 2}, {GMM_FORMAT_R8G8_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_SNORM), 0, 2, 1, 2}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 2, 1, 2}}, + {{GMM_FORMAT_R8G8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_UINT), 0, 2, 1, 2}, + {GMM_FORMAT_R8G8_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_SINT), 0, 2, 1, 2}, + {GMM_FORMAT_R8G8_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_UNORM), 0, 2, 1, 2}, + {GMM_FORMAT_R8G8_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8_SNORM), 0, 2, 1, 2}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 2, 1, 2}}, // ZE_IMAGE_FORMAT_LAYOUT_8_8_8_8 - {{GMM_FORMAT_R8G8B8A8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_UINT), 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_SINT), 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_UNORM), 0, 4, 1, 4}, {GMM_FORMAT_R8G8B8A8_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_SNORM), 0, 4, 1, 4}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 4, 1, 4}}, + {{GMM_FORMAT_R8G8B8A8_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_UINT), 0, 4, 1, 4}, + {GMM_FORMAT_R8G8B8A8_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_SINT), 0, 4, 1, 4}, + {GMM_FORMAT_R8G8B8A8_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_UNORM), 0, 4, 1, 4}, + {GMM_FORMAT_R8G8B8A8_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R8G8B8A8_SNORM), 0, 4, 1, 4}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 4, 1, 4}}, // ZE_IMAGE_FORMAT_LAYOUT_16_16 - {{GMM_FORMAT_R16G16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_UINT), 0, 2, 2, 4}, {GMM_FORMAT_R16G16_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_SINT), 0, 2, 2, 4}, {GMM_FORMAT_R16G16_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_UNORM), 0, 2, 2, 4}, {GMM_FORMAT_R16G16_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_SNORM), 0, 2, 2, 4}, {GMM_FORMAT_R16G16_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_FLOAT), 0, 2, 2, 4}}, + {{GMM_FORMAT_R16G16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_UINT), 0, 2, 2, 4}, + {GMM_FORMAT_R16G16_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_SINT), 0, 2, 2, 4}, + {GMM_FORMAT_R16G16_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_UNORM), 0, 2, 2, 4}, + {GMM_FORMAT_R16G16_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_SNORM), 0, 2, 2, 4}, + {GMM_FORMAT_R16G16_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16_FLOAT), 0, 2, 2, 4}}, // ZE_IMAGE_FORMAT_LAYOUT_16_16_16_16 - {{GMM_FORMAT_R16G16B16A16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_UINT), 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_SINT), 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_UNORM), 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_SNORM), 0, 4, 2, 8}, {GMM_FORMAT_R16G16B16A16_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_FLOAT), 0, 4, 2, 8}}, + {{GMM_FORMAT_R16G16B16A16_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_UINT), 0, 4, 2, 8}, + {GMM_FORMAT_R16G16B16A16_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_SINT), 0, 4, 2, 8}, + {GMM_FORMAT_R16G16B16A16_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_UNORM), 0, 4, 2, 8}, + {GMM_FORMAT_R16G16B16A16_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_SNORM), 0, 4, 2, 8}, + {GMM_FORMAT_R16G16B16A16_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R16G16B16A16_FLOAT), 0, 4, 2, 8}}, // ZE_IMAGE_FORMAT_LAYOUT_32_32 - {{GMM_FORMAT_R32G32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_UINT), 0, 2, 4, 8}, {GMM_FORMAT_R32G32_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_SINT), 0, 2, 4, 8}, {GMM_FORMAT_R32G32_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_UNORM), 0, 2, 4, 8}, {GMM_FORMAT_R32G32_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_SNORM), 0, 2, 4, 8}, {GMM_FORMAT_R32G32_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_FLOAT), 0, 2, 4, 8}}, + {{GMM_FORMAT_R32G32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_UINT), 0, 2, 4, 8}, + {GMM_FORMAT_R32G32_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_SINT), 0, 2, 4, 8}, + {GMM_FORMAT_R32G32_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_UNORM), 0, 2, 4, 8}, + {GMM_FORMAT_R32G32_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_SNORM), 0, 2, 4, 8}, + {GMM_FORMAT_R32G32_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32_FLOAT), 0, 2, 4, 8}}, // ZE_IMAGE_FORMAT_LAYOUT_32_32_32_32 - {{GMM_FORMAT_R32G32B32A32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_UINT), 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_SINT), 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_UNORM), 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_SNORM), 0, 4, 4, 16}, {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_FLOAT), 0, 4, 4, 16}}, + {{GMM_FORMAT_R32G32B32A32_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_UINT), 0, 4, 4, 16}, + {GMM_FORMAT_R32G32B32A32_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_SINT), 0, 4, 4, 16}, + {GMM_FORMAT_R32G32B32A32_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_UNORM), 0, 4, 4, 16}, + {GMM_FORMAT_R32G32B32A32_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_SNORM), 0, 4, 4, 16}, + {GMM_FORMAT_R32G32B32A32_FLOAT_TYPE, static_cast(RSS::SURFACE_FORMAT_R32G32B32A32_FLOAT), 0, 4, 4, 16}}, // ZE_IMAGE_FORMAT_LAYOUT_10_10_10_2 - {{GMM_FORMAT_R10G10B10A2_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_UINT), 0, 1, 1, 1}, {GMM_FORMAT_R10G10B10A2_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_SINT), 0, 1, 1, 1}, {GMM_FORMAT_R10G10B10A2_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_UNORM), 0, 1, 1, 1}, {GMM_FORMAT_R10G10B10A2_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_SNORM), 0, 1, 1, 1}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 1, 1, 1}}, + {{GMM_FORMAT_R10G10B10A2_UINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_UINT), 0, 1, 1, 1}, + {GMM_FORMAT_R10G10B10A2_SINT_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_SINT), 0, 1, 1, 1}, + {GMM_FORMAT_R10G10B10A2_UNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_UNORM), 0, 1, 1, 1}, + {GMM_FORMAT_R10G10B10A2_SNORM_TYPE, static_cast(RSS::SURFACE_FORMAT_R10G10B10A2_SNORM), 0, 1, 1, 1}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 1, 1, 1}}, // ZE_IMAGE_FORMAT_LAYOUT_11_11_10 - {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}, + {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}, // ZE_IMAGE_FORMAT_LAYOUT_5_6_5 - {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}, + {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}, // ZE_IMAGE_FORMAT_LAYOUT_5_5_5_1 - {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}, + {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}, // ZE_IMAGE_FORMAT_LAYOUT_4_4_4_4 - {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}}; + {{gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}, + {gmmResourceFormatUndefined, static_cast(surfaceFormatUndefined), 0, 0, 0, 0}}}; const SHADER_CHANNEL_SELECT shaderChannelSelect[ZE_IMAGE_FORMAT_SWIZZLE_MAX + 1] = { RSS::SHADER_CHANNEL_SELECT_RED, diff --git a/level_zero/core/source/image_hw.inl b/level_zero/core/source/image_hw.inl index 108d2f947d..274df48e4d 100644 --- a/level_zero/core/source/image_hw.inl +++ b/level_zero/core/source/image_hw.inl @@ -130,7 +130,9 @@ bool ImageCoreFamily::initialize(Device *device, const ze_image_d { surfaceState = GfxFamily::cmdInitRenderSurfaceState; - NEO::setImageSurfaceState(&surfaceState, imgInfo, gmm, *gmmHelper, __GMM_NO_CUBE_MAP, this->allocation->getGpuAddress(), surfaceOffsets, desc->format.layout == ZE_IMAGE_FORMAT_LAYOUT_NV12); + NEO::setImageSurfaceState(&surfaceState, imgInfo, gmm, *gmmHelper, __GMM_NO_CUBE_MAP, + this->allocation->getGpuAddress(), surfaceOffsets, + desc->format.layout == ZE_IMAGE_FORMAT_LAYOUT_NV12); NEO::setImageSurfaceStateDimensions(&surfaceState, imgInfo, __GMM_NO_CUBE_MAP, surfaceType); surfaceState.setSurfaceMinLod(0u); @@ -166,14 +168,18 @@ bool ImageCoreFamily::initialize(Device *device, const ze_image_d imgInfoRedescirebed.qPitch = imgInfo.qPitch; redescribedSurfaceState = GfxFamily::cmdInitRenderSurfaceState; - NEO::setImageSurfaceState(&redescribedSurfaceState, imgInfoRedescirebed, gmm, *gmmHelper, __GMM_NO_CUBE_MAP, this->allocation->getGpuAddress(), surfaceOffsets, desc->format.layout == ZE_IMAGE_FORMAT_LAYOUT_NV12); + NEO::setImageSurfaceState(&redescribedSurfaceState, imgInfoRedescirebed, gmm, *gmmHelper, + __GMM_NO_CUBE_MAP, this->allocation->getGpuAddress(), surfaceOffsets, + desc->format.layout == ZE_IMAGE_FORMAT_LAYOUT_NV12); NEO::setImageSurfaceStateDimensions(&redescribedSurfaceState, imgInfoRedescirebed, __GMM_NO_CUBE_MAP, surfaceType); redescribedSurfaceState.setSurfaceMinLod(0u); redescribedSurfaceState.setMipCountLod(0u); NEO::setMipTailStartLod(&redescribedSurfaceState, gmm); - if (imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R8_UINT_TYPE || imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R16_UINT_TYPE || imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R32_UINT_TYPE) { + if (imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R8_UINT_TYPE || + imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R16_UINT_TYPE || + imgInfoRedescirebed.surfaceFormat->GMMSurfaceFormat == GMM_FORMAT_R32_UINT_TYPE) { redescribedSurfaceState.setShaderChannelSelectRed(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_RED); redescribedSurfaceState.setShaderChannelSelectGreen(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); redescribedSurfaceState.setShaderChannelSelectBlue(RENDER_SURFACE_STATE::SHADER_CHANNEL_SELECT_ZERO); diff --git a/level_zero/core/source/kernel_hw.h b/level_zero/core/source/kernel_hw.h index 56231324b1..4b8d5de479 100644 --- a/level_zero/core/source/kernel_hw.h +++ b/level_zero/core/source/kernel_hw.h @@ -28,7 +28,8 @@ struct KernelHw : public KernelImp { uintptr_t baseAddress = static_cast(alloc->getGpuAddress()); auto sshAlignmentMask = NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignmentMask(); - baseAddress &= sshAlignmentMask; // chop-off misalligned bytes, it into account in bufferOffset patch token + // Remove misalligned bytes, accounted for in in bufferOffset patch token + baseAddress &= sshAlignmentMask; auto offset = ptrDiff(address, reinterpret_cast(baseAddress)); size_t sizeTillEndOfSurface = alloc->getUnderlyingBufferSize() - offset; diff --git a/level_zero/core/source/kernel_imp.cpp b/level_zero/core/source/kernel_imp.cpp index f242dc28f5..320ea8b017 100644 --- a/level_zero/core/source/kernel_imp.cpp +++ b/level_zero/core/source/kernel_imp.cpp @@ -235,6 +235,7 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, auto itemsInGroup = Math::computeTotalElementsCount(groupSize); if (itemsInGroup > module->getMaxGroupSize()) { + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } auto grfSize = kernelImmData->getDescriptor().kernelAttributes.grfSize; diff --git a/level_zero/core/source/memory.cpp b/level_zero/core/source/memory.cpp index c8340dc346..6f73e28c85 100644 --- a/level_zero/core/source/memory.cpp +++ b/level_zero/core/source/memory.cpp @@ -31,7 +31,7 @@ ze_result_t DriverHandleImp::openIpcMemHandle(ze_device_handle_t hDevice, ze_ipc ze_ipc_memory_flag_t flags, void **ptr) { uint64_t handle = *(pIpcHandle.data); NEO::osHandle osHandle = static_cast(handle); - NEO::AllocationProperties unifiedMemoryProperties{static_cast(hDevice)->getRootDeviceIndex(), + NEO::AllocationProperties unifiedMemoryProperties{Device::fromHandle(hDevice)->getRootDeviceIndex(), MemoryConstants::pageSize, NEO::GraphicsAllocation::AllocationType::BUFFER}; NEO::GraphicsAllocation *alloc = @@ -95,6 +95,7 @@ ze_result_t DriverHandleImp::getMemAddressRange(const void *ptr, void **pBase, s return ZE_RESULT_SUCCESS; } + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } @@ -126,7 +127,7 @@ ze_result_t DriverHandleImp::allocDeviceMem(ze_device_handle_t hDevice, ze_devic NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY); unifiedMemoryProperties.allocationFlags.flags.shareable = 1u; void *usmPtr = - svmAllocsManager->createUnifiedMemoryAllocation(static_cast(hDevice)->getRootDeviceIndex(), + svmAllocsManager->createUnifiedMemoryAllocation(Device::fromHandle(hDevice)->getRootDeviceIndex(), size, unifiedMemoryProperties); if (usmPtr == nullptr) { return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY; @@ -146,7 +147,7 @@ ze_result_t DriverHandleImp::allocSharedMem(ze_device_handle_t hDevice, ze_devic NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY); auto usmPtr = - svmAllocsManager->createSharedUnifiedMemoryAllocation(static_cast(hDevice)->getRootDeviceIndex(), + svmAllocsManager->createSharedUnifiedMemoryAllocation(Device::fromHandle(hDevice)->getRootDeviceIndex(), size, unifiedMemoryProperties, static_cast(L0::Device::fromHandle(hDevice))); diff --git a/level_zero/core/source/memory_operations_helper.h b/level_zero/core/source/memory_operations_helper.h index 578df6f0c8..2e57a8f37e 100644 --- a/level_zero/core/source/memory_operations_helper.h +++ b/level_zero/core/source/memory_operations_helper.h @@ -30,6 +30,7 @@ static ze_result_t changeMemoryOperationStatusToL0ResultType(NEO::MemoryOperatio return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE; default: + DEBUG_BREAK_IF(true); return ZE_RESULT_ERROR_UNKNOWN; } } diff --git a/level_zero/core/source/module_imp.cpp b/level_zero/core/source/module_imp.cpp index 89360fa5ac..07c90f1273 100644 --- a/level_zero/core/source/module_imp.cpp +++ b/level_zero/core/source/module_imp.cpp @@ -31,10 +31,10 @@ namespace L0 { -namespace ZeBuildOptions { +namespace BuildOptions { ConstStringRef optDisable = "-ze-opt-disable"; ConstStringRef greaterThan4GbRequired = "-ze-opt-greater-than-4GB-buffer-required"; -} // namespace ZeBuildOptions +} // namespace BuildOptions struct ModuleTranslationUnit { ModuleTranslationUnit(L0::Device *device) @@ -361,8 +361,8 @@ void ModuleImp::createBuildOptions(const char *pBuildFlags, std::string &apiOpti std::string buildFlags(pBuildFlags); apiOptions = pBuildFlags; - moveBuildOption(apiOptions, apiOptions, NEO::CompilerOptions::optDisable, ZeBuildOptions::optDisable); - moveBuildOption(internalBuildOptions, apiOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired, ZeBuildOptions::greaterThan4GbRequired); + moveBuildOption(apiOptions, apiOptions, NEO::CompilerOptions::optDisable, BuildOptions::optDisable); + moveBuildOption(internalBuildOptions, apiOptions, NEO::CompilerOptions::greaterThan4gbBuffersRequired, BuildOptions::greaterThan4GbRequired); createBuildExtraOptions(apiOptions, internalBuildOptions); } } diff --git a/level_zero/core/source/sampler_imp.cpp b/level_zero/core/source/sampler_imp.cpp index 35224e68ef..3380b9c736 100644 --- a/level_zero/core/source/sampler_imp.cpp +++ b/level_zero/core/source/sampler_imp.cpp @@ -32,7 +32,11 @@ Sampler *Sampler::create(uint32_t productFamily, Device *device, const ze_sample SamplerImp *sampler = nullptr; if (allocator) { sampler = static_cast((*allocator)()); - sampler->initialize(device, desc); + if (sampler->initialize(device, desc)) { + delete sampler; + DEBUG_BREAK_IF(true); + return nullptr; + } } return sampler;