From 52d96af5f0218befd4097ced8d777448b689ce51 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Tue, 24 Nov 2020 14:20:33 +0000 Subject: [PATCH] Use device from API function in clGetKernelWorkGroupInfo/SubGroupInfo store execution environment reference in Kernel class Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski --- opencl/source/api/api.cpp | 32 +++++++++++-------- opencl/source/kernel/kernel.cpp | 21 ++++++------ opencl/source/kernel/kernel.h | 7 ++-- opencl/source/program/program.h | 2 ++ opencl/test/unit_test/kernel/kernel_tests.cpp | 12 +++---- 5 files changed, 42 insertions(+), 32 deletions(-) diff --git a/opencl/source/api/api.cpp b/opencl/source/api/api.cpp index e32bee0397..31e0decd6f 100644 --- a/opencl/source/api/api.cpp +++ b/opencl/source/api/api.cpp @@ -1918,15 +1918,19 @@ cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel kernel, "paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize), "paramValueSizeRet", paramValueSizeRet); - auto pKernel = castToObject(kernel); - retVal = pKernel - ? pKernel->getWorkGroupInfo( - device, - paramName, - paramValueSize, - paramValue, - paramValueSizeRet) - : CL_INVALID_KERNEL; + Kernel *pKernel = nullptr; + ClDevice *pClDevice = nullptr; + retVal = validateObjects(WithCastToInternal(device, &pClDevice), + WithCastToInternal(kernel, &pKernel)); + + if (CL_SUCCESS == retVal) { + retVal = pKernel->getWorkGroupInfo( + *pClDevice, + paramName, + paramValueSize, + paramValue, + paramValueSizeRet); + } TRACING_EXIT(clGetKernelWorkGroupInfo, &retVal); return retVal; } @@ -5067,7 +5071,8 @@ cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(cl_kernel kernel, "paramValueSizeRet", paramValueSizeRet); Kernel *pKernel = nullptr; - retVal = validateObjects(device, + ClDevice *pClDevice = nullptr; + retVal = validateObjects(WithCastToInternal(device, &pClDevice), WithCastToInternal(kernel, &pKernel)); if (CL_SUCCESS != retVal) { @@ -5078,7 +5083,7 @@ cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(cl_kernel kernel, case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE: case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE: case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL: - return pKernel->getSubGroupInfo(paramName, + return pKernel->getSubGroupInfo(*pClDevice, paramName, inputValueSize, inputValue, paramValueSize, paramValue, paramValueSizeRet); @@ -5167,7 +5172,8 @@ cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel, "paramValueSizeRet", paramValueSizeRet); Kernel *pKernel = nullptr; - retVal = validateObjects(device, + ClDevice *pClDevice = nullptr; + retVal = validateObjects(WithCastToInternal(device, &pClDevice), WithCastToInternal(kernel, &pKernel)); if (CL_SUCCESS != retVal) { @@ -5175,7 +5181,7 @@ cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel, return retVal; } - retVal = pKernel->getSubGroupInfo(paramName, + retVal = pKernel->getSubGroupInfo(*pClDevice, paramName, inputValueSize, inputValue, paramValueSize, paramValue, paramValueSizeRet); diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index be91eb5074..3e7fef9029 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -68,6 +68,7 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, bool schedu : slmTotalSize(kernelInfoArg.workloadInfo.slmStaticSize), isParentKernel((kernelInfoArg.patchInfo.executionEnvironment != nullptr) ? (kernelInfoArg.patchInfo.executionEnvironment->HasDeviceEnqueue != 0) : false), isSchedulerKernel(schedulerKernel), + executionEnvironment(programArg->getExecutionEnvironment()), program(programArg), deviceVector(programArg->getDevices()), kernelInfo(kernelInfoArg) { @@ -256,7 +257,7 @@ cl_int Kernel::initialize() { retVal = CL_OUT_OF_RESOURCES; break; } - kernelDeviceInfos[rootDeviceIndex].privateSurface = getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties( + kernelDeviceInfos[rootDeviceIndex].privateSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties( {rootDeviceIndex, static_cast(kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize), GraphicsAllocation::AllocationType::PRIVATE_SURFACE, @@ -551,7 +552,7 @@ cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t return retVal; } -cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info paramName, +cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { cl_int retVal = CL_INVALID_VALUE; @@ -566,7 +567,7 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p cl_ulong scratchSize; cl_ulong privateMemSize; size_t maxWorkgroupSize; - const auto &hwInfo = getDevice().getHardwareInfo(); + const auto &hwInfo = device.getHardwareInfo(); auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); @@ -630,7 +631,7 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p return retVal; } -cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName, +cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { @@ -660,7 +661,7 @@ cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName, } numDimensions = inputValueSize / sizeof(size_t); if (numDimensions == 0 || - numDimensions > static_cast(getDevice().getDeviceInfo().maxWorkItemDimensions)) { + numDimensions > static_cast(clDevice.getDeviceInfo().maxWorkItemDimensions)) { return CL_INVALID_VALUE; } } @@ -674,7 +675,7 @@ cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName, } numDimensions = paramValueSize / sizeof(size_t); if (numDimensions == 0 || - numDimensions > static_cast(getDevice().getDeviceInfo().maxWorkItemDimensions)) { + numDimensions > static_cast(clDevice.getDeviceInfo().maxWorkItemDimensions)) { return CL_INVALID_VALUE; } } @@ -749,7 +750,7 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize) auto &heapInfo = pKernelInfo->heapInfo; heapInfo.KernelHeapSize = static_cast(newKernelHeapSize); pKernelInfo->isKernelHeapSubstituted = true; - auto memoryManager = getDevice().getMemoryManager(); + auto memoryManager = executionEnvironment.memoryManager.get(); auto currentAllocationSize = pKernelInfo->kernelAllocation->getUnderlyingBufferSize(); bool status = false; @@ -1068,7 +1069,7 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive if (kernelArguments[argIndex].object) { if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) { auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object; - auto pageFaultManager = getDevice().getMemoryManager()->getPageFaultManager(); + auto pageFaultManager = executionEnvironment.memoryManager->getPageFaultManager(); if (pageFaultManager && this->isUnifiedMemorySyncRequired) { pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(pSVMAlloc->getGpuAddress())); @@ -1703,7 +1704,7 @@ void Kernel::createReflectionSurface() { kernelReflectionSize += blockCount * alignUp(maxConstantBufferSize, sizeof(void *)); kernelReflectionSize += parentImageCount * sizeof(IGIL_ImageParamters); kernelReflectionSize += parentSamplerCount * sizeof(IGIL_ParentSamplerParams); - kernelReflectionSurface = getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), kernelReflectionSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()}); + kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), kernelReflectionSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()}); for (uint32_t i = 0; i < blockCount; i++) { const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i); @@ -1777,7 +1778,7 @@ void Kernel::createReflectionSurface() { if (DebugManager.flags.ForceDispatchScheduler.get()) { if (this->isSchedulerKernel && kernelReflectionSurface == nullptr) { - kernelReflectionSurface = getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()}); + kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()}); } } } diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 2fd0e4a4e2..2cf9b651bf 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -17,6 +17,7 @@ #include "opencl/extensions/public/cl_ext_private.h" #include "opencl/source/api/cl_types.h" +#include "opencl/source/cl_device/cl_device.h" #include "opencl/source/device_queue/device_queue.h" #include "opencl/source/helpers/base_object.h" #include "opencl/source/helpers/properties_helper.h" @@ -143,10 +144,10 @@ class Kernel : public BaseObject<_cl_kernel> { cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; - cl_int getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info paramName, + cl_int getWorkGroupInfo(ClDevice &clDevice, cl_kernel_work_group_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; - cl_int getSubGroupInfo(cl_kernel_sub_group_info paramName, + cl_int getSubGroupInfo(ClDevice &device, cl_kernel_sub_group_info paramName, size_t inputValueSize, const void *inputValue, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const; @@ -509,7 +510,7 @@ class Kernel : public BaseObject<_cl_kernel> { const ClDevice &getDevice() const { return *deviceVector[0]; } - + const ExecutionEnvironment &executionEnvironment; Program *program; const ClDeviceVector &deviceVector; const KernelInfo &kernelInfo; diff --git a/opencl/source/program/program.h b/opencl/source/program/program.h index af77b72f2a..1c2526c2ad 100644 --- a/opencl/source/program/program.h +++ b/opencl/source/program/program.h @@ -275,6 +275,8 @@ class Program : public BaseObject<_cl_program> { return 0 != exposedKernels; } + const ExecutionEnvironment &getExecutionEnvironment() const { return executionEnvironment; } + protected: MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize, ClDevice &clDevice); diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 504b899b5e..1caef7ee1a 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -281,7 +281,7 @@ TEST_P(KernelTest, GivenKernelWorkGroupSizeWhenGettingWorkGroupInfoThenWorkGroup pKernel->maxKernelWorkGroupSize = static_cast(kernelMaxWorkGroupSize); retVal = pKernel->getWorkGroupInfo( - pClDevice, + *pClDevice, paramName, paramValueSize, ¶mValue, @@ -299,7 +299,7 @@ TEST_P(KernelTest, GivenKernelCompileWorkGroupSizeWhenGettingWorkGroupInfoThenCo size_t paramValueSizeRet = 0; retVal = pKernel->getWorkGroupInfo( - pClDevice, + *pClDevice, paramName, paramValueSize, ¶mValue, @@ -313,7 +313,7 @@ TEST_P(KernelTest, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValue size_t paramValueSizeRet = 0x1234u; retVal = pKernel->getWorkGroupInfo( - pClDevice, + *pClDevice, 0, 0, nullptr, @@ -2653,13 +2653,13 @@ TEST(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedThenMa kernel.executionEnvironment.LargestCompiledSIMDSize = CommonConstants::maximalSimdSize; size_t maxKernelWkgSize; - kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); + kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(1024u, maxKernelWkgSize); kernel.executionEnvironment.LargestCompiledSIMDSize = 16; - kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); + kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(512u, maxKernelWkgSize); kernel.executionEnvironment.LargestCompiledSIMDSize = 8; - kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); + kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr); EXPECT_EQ(256u, maxKernelWkgSize); }