Use device from API function in clGetKernelWorkGroupInfo/SubGroupInfo

store execution environment reference in Kernel class

Related-To: NEO-5001
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2020-11-24 14:20:33 +00:00
committed by Compute-Runtime-Automation
parent ae3ad3e8bc
commit 52d96af5f0
5 changed files with 42 additions and 32 deletions

View File

@ -1918,15 +1918,19 @@ cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel kernel,
"paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize),
"paramValueSizeRet", paramValueSizeRet);
auto pKernel = castToObject<Kernel>(kernel);
retVal = pKernel
? pKernel->getWorkGroupInfo(
device,
paramName,
paramValueSize,
paramValue,
paramValueSizeRet)
: CL_INVALID_KERNEL;
Kernel *pKernel = nullptr;
ClDevice *pClDevice = nullptr;
retVal = validateObjects(WithCastToInternal(device, &pClDevice),
WithCastToInternal(kernel, &pKernel));
if (CL_SUCCESS == retVal) {
retVal = pKernel->getWorkGroupInfo(
*pClDevice,
paramName,
paramValueSize,
paramValue,
paramValueSizeRet);
}
TRACING_EXIT(clGetKernelWorkGroupInfo, &retVal);
return retVal;
}
@ -5067,7 +5071,8 @@ cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(cl_kernel kernel,
"paramValueSizeRet", paramValueSizeRet);
Kernel *pKernel = nullptr;
retVal = validateObjects(device,
ClDevice *pClDevice = nullptr;
retVal = validateObjects(WithCastToInternal(device, &pClDevice),
WithCastToInternal(kernel, &pKernel));
if (CL_SUCCESS != retVal) {
@ -5078,7 +5083,7 @@ cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(cl_kernel kernel,
case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE:
case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE:
case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL:
return pKernel->getSubGroupInfo(paramName,
return pKernel->getSubGroupInfo(*pClDevice, paramName,
inputValueSize, inputValue,
paramValueSize, paramValue,
paramValueSizeRet);
@ -5167,7 +5172,8 @@ cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel,
"paramValueSizeRet", paramValueSizeRet);
Kernel *pKernel = nullptr;
retVal = validateObjects(device,
ClDevice *pClDevice = nullptr;
retVal = validateObjects(WithCastToInternal(device, &pClDevice),
WithCastToInternal(kernel, &pKernel));
if (CL_SUCCESS != retVal) {
@ -5175,7 +5181,7 @@ cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel,
return retVal;
}
retVal = pKernel->getSubGroupInfo(paramName,
retVal = pKernel->getSubGroupInfo(*pClDevice, paramName,
inputValueSize, inputValue,
paramValueSize, paramValue,
paramValueSizeRet);

View File

@ -68,6 +68,7 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, bool schedu
: slmTotalSize(kernelInfoArg.workloadInfo.slmStaticSize),
isParentKernel((kernelInfoArg.patchInfo.executionEnvironment != nullptr) ? (kernelInfoArg.patchInfo.executionEnvironment->HasDeviceEnqueue != 0) : false),
isSchedulerKernel(schedulerKernel),
executionEnvironment(programArg->getExecutionEnvironment()),
program(programArg),
deviceVector(programArg->getDevices()),
kernelInfo(kernelInfoArg) {
@ -256,7 +257,7 @@ cl_int Kernel::initialize() {
retVal = CL_OUT_OF_RESOURCES;
break;
}
kernelDeviceInfos[rootDeviceIndex].privateSurface = getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties(
kernelDeviceInfos[rootDeviceIndex].privateSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(
{rootDeviceIndex,
static_cast<size_t>(kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize),
GraphicsAllocation::AllocationType::PRIVATE_SURFACE,
@ -551,7 +552,7 @@ cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t
return retVal;
}
cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info paramName,
cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info paramName,
size_t paramValueSize, void *paramValue,
size_t *paramValueSizeRet) const {
cl_int retVal = CL_INVALID_VALUE;
@ -566,7 +567,7 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
cl_ulong scratchSize;
cl_ulong privateMemSize;
size_t maxWorkgroupSize;
const auto &hwInfo = getDevice().getHardwareInfo();
const auto &hwInfo = device.getHardwareInfo();
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet);
@ -630,7 +631,7 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
return retVal;
}
cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName,
cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info paramName,
size_t inputValueSize, const void *inputValue,
size_t paramValueSize, void *paramValue,
size_t *paramValueSizeRet) const {
@ -660,7 +661,7 @@ cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName,
}
numDimensions = inputValueSize / sizeof(size_t);
if (numDimensions == 0 ||
numDimensions > static_cast<size_t>(getDevice().getDeviceInfo().maxWorkItemDimensions)) {
numDimensions > static_cast<size_t>(clDevice.getDeviceInfo().maxWorkItemDimensions)) {
return CL_INVALID_VALUE;
}
}
@ -674,7 +675,7 @@ cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName,
}
numDimensions = paramValueSize / sizeof(size_t);
if (numDimensions == 0 ||
numDimensions > static_cast<size_t>(getDevice().getDeviceInfo().maxWorkItemDimensions)) {
numDimensions > static_cast<size_t>(clDevice.getDeviceInfo().maxWorkItemDimensions)) {
return CL_INVALID_VALUE;
}
}
@ -749,7 +750,7 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize)
auto &heapInfo = pKernelInfo->heapInfo;
heapInfo.KernelHeapSize = static_cast<uint32_t>(newKernelHeapSize);
pKernelInfo->isKernelHeapSubstituted = true;
auto memoryManager = getDevice().getMemoryManager();
auto memoryManager = executionEnvironment.memoryManager.get();
auto currentAllocationSize = pKernelInfo->kernelAllocation->getUnderlyingBufferSize();
bool status = false;
@ -1068,7 +1069,7 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive
if (kernelArguments[argIndex].object) {
if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) {
auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object;
auto pageFaultManager = getDevice().getMemoryManager()->getPageFaultManager();
auto pageFaultManager = executionEnvironment.memoryManager->getPageFaultManager();
if (pageFaultManager &&
this->isUnifiedMemorySyncRequired) {
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(pSVMAlloc->getGpuAddress()));
@ -1703,7 +1704,7 @@ void Kernel::createReflectionSurface() {
kernelReflectionSize += blockCount * alignUp(maxConstantBufferSize, sizeof(void *));
kernelReflectionSize += parentImageCount * sizeof(IGIL_ImageParamters);
kernelReflectionSize += parentSamplerCount * sizeof(IGIL_ParentSamplerParams);
kernelReflectionSurface = getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), kernelReflectionSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()});
kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), kernelReflectionSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()});
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
@ -1777,7 +1778,7 @@ void Kernel::createReflectionSurface() {
if (DebugManager.flags.ForceDispatchScheduler.get()) {
if (this->isSchedulerKernel && kernelReflectionSurface == nullptr) {
kernelReflectionSurface = getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()});
kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()});
}
}
}

View File

@ -17,6 +17,7 @@
#include "opencl/extensions/public/cl_ext_private.h"
#include "opencl/source/api/cl_types.h"
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/device_queue/device_queue.h"
#include "opencl/source/helpers/base_object.h"
#include "opencl/source/helpers/properties_helper.h"
@ -143,10 +144,10 @@ class Kernel : public BaseObject<_cl_kernel> {
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName,
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
cl_int getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info paramName,
cl_int getWorkGroupInfo(ClDevice &clDevice, cl_kernel_work_group_info paramName,
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
cl_int getSubGroupInfo(cl_kernel_sub_group_info paramName,
cl_int getSubGroupInfo(ClDevice &device, cl_kernel_sub_group_info paramName,
size_t inputValueSize, const void *inputValue,
size_t paramValueSize, void *paramValue,
size_t *paramValueSizeRet) const;
@ -509,7 +510,7 @@ class Kernel : public BaseObject<_cl_kernel> {
const ClDevice &getDevice() const {
return *deviceVector[0];
}
const ExecutionEnvironment &executionEnvironment;
Program *program;
const ClDeviceVector &deviceVector;
const KernelInfo &kernelInfo;

View File

@ -275,6 +275,8 @@ class Program : public BaseObject<_cl_program> {
return 0 != exposedKernels;
}
const ExecutionEnvironment &getExecutionEnvironment() const { return executionEnvironment; }
protected:
MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize, ClDevice &clDevice);

View File

@ -281,7 +281,7 @@ TEST_P(KernelTest, GivenKernelWorkGroupSizeWhenGettingWorkGroupInfoThenWorkGroup
pKernel->maxKernelWorkGroupSize = static_cast<uint32_t>(kernelMaxWorkGroupSize);
retVal = pKernel->getWorkGroupInfo(
pClDevice,
*pClDevice,
paramName,
paramValueSize,
&paramValue,
@ -299,7 +299,7 @@ TEST_P(KernelTest, GivenKernelCompileWorkGroupSizeWhenGettingWorkGroupInfoThenCo
size_t paramValueSizeRet = 0;
retVal = pKernel->getWorkGroupInfo(
pClDevice,
*pClDevice,
paramName,
paramValueSize,
&paramValue,
@ -313,7 +313,7 @@ TEST_P(KernelTest, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValue
size_t paramValueSizeRet = 0x1234u;
retVal = pKernel->getWorkGroupInfo(
pClDevice,
*pClDevice,
0,
0,
nullptr,
@ -2653,13 +2653,13 @@ TEST(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedThenMa
kernel.executionEnvironment.LargestCompiledSIMDSize = CommonConstants::maximalSimdSize;
size_t maxKernelWkgSize;
kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
EXPECT_EQ(1024u, maxKernelWkgSize);
kernel.executionEnvironment.LargestCompiledSIMDSize = 16;
kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
EXPECT_EQ(512u, maxKernelWkgSize);
kernel.executionEnvironment.LargestCompiledSIMDSize = 8;
kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
EXPECT_EQ(256u, maxKernelWkgSize);
}