mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Use device from API function in clGetKernelWorkGroupInfo/SubGroupInfo
store execution environment reference in Kernel class Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
ae3ad3e8bc
commit
52d96af5f0
@ -1918,15 +1918,19 @@ cl_int CL_API_CALL clGetKernelWorkGroupInfo(cl_kernel kernel,
|
||||
"paramValue", NEO::FileLoggerInstance().infoPointerToString(paramValue, paramValueSize),
|
||||
"paramValueSizeRet", paramValueSizeRet);
|
||||
|
||||
auto pKernel = castToObject<Kernel>(kernel);
|
||||
retVal = pKernel
|
||||
? pKernel->getWorkGroupInfo(
|
||||
device,
|
||||
paramName,
|
||||
paramValueSize,
|
||||
paramValue,
|
||||
paramValueSizeRet)
|
||||
: CL_INVALID_KERNEL;
|
||||
Kernel *pKernel = nullptr;
|
||||
ClDevice *pClDevice = nullptr;
|
||||
retVal = validateObjects(WithCastToInternal(device, &pClDevice),
|
||||
WithCastToInternal(kernel, &pKernel));
|
||||
|
||||
if (CL_SUCCESS == retVal) {
|
||||
retVal = pKernel->getWorkGroupInfo(
|
||||
*pClDevice,
|
||||
paramName,
|
||||
paramValueSize,
|
||||
paramValue,
|
||||
paramValueSizeRet);
|
||||
}
|
||||
TRACING_EXIT(clGetKernelWorkGroupInfo, &retVal);
|
||||
return retVal;
|
||||
}
|
||||
@ -5067,7 +5071,8 @@ cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(cl_kernel kernel,
|
||||
"paramValueSizeRet", paramValueSizeRet);
|
||||
|
||||
Kernel *pKernel = nullptr;
|
||||
retVal = validateObjects(device,
|
||||
ClDevice *pClDevice = nullptr;
|
||||
retVal = validateObjects(WithCastToInternal(device, &pClDevice),
|
||||
WithCastToInternal(kernel, &pKernel));
|
||||
|
||||
if (CL_SUCCESS != retVal) {
|
||||
@ -5078,7 +5083,7 @@ cl_int CL_API_CALL clGetKernelSubGroupInfoKHR(cl_kernel kernel,
|
||||
case CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE:
|
||||
case CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE:
|
||||
case CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL:
|
||||
return pKernel->getSubGroupInfo(paramName,
|
||||
return pKernel->getSubGroupInfo(*pClDevice, paramName,
|
||||
inputValueSize, inputValue,
|
||||
paramValueSize, paramValue,
|
||||
paramValueSizeRet);
|
||||
@ -5167,7 +5172,8 @@ cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel,
|
||||
"paramValueSizeRet", paramValueSizeRet);
|
||||
|
||||
Kernel *pKernel = nullptr;
|
||||
retVal = validateObjects(device,
|
||||
ClDevice *pClDevice = nullptr;
|
||||
retVal = validateObjects(WithCastToInternal(device, &pClDevice),
|
||||
WithCastToInternal(kernel, &pKernel));
|
||||
|
||||
if (CL_SUCCESS != retVal) {
|
||||
@ -5175,7 +5181,7 @@ cl_int CL_API_CALL clGetKernelSubGroupInfo(cl_kernel kernel,
|
||||
return retVal;
|
||||
}
|
||||
|
||||
retVal = pKernel->getSubGroupInfo(paramName,
|
||||
retVal = pKernel->getSubGroupInfo(*pClDevice, paramName,
|
||||
inputValueSize, inputValue,
|
||||
paramValueSize, paramValue,
|
||||
paramValueSizeRet);
|
||||
|
@ -68,6 +68,7 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, bool schedu
|
||||
: slmTotalSize(kernelInfoArg.workloadInfo.slmStaticSize),
|
||||
isParentKernel((kernelInfoArg.patchInfo.executionEnvironment != nullptr) ? (kernelInfoArg.patchInfo.executionEnvironment->HasDeviceEnqueue != 0) : false),
|
||||
isSchedulerKernel(schedulerKernel),
|
||||
executionEnvironment(programArg->getExecutionEnvironment()),
|
||||
program(programArg),
|
||||
deviceVector(programArg->getDevices()),
|
||||
kernelInfo(kernelInfoArg) {
|
||||
@ -256,7 +257,7 @@ cl_int Kernel::initialize() {
|
||||
retVal = CL_OUT_OF_RESOURCES;
|
||||
break;
|
||||
}
|
||||
kernelDeviceInfos[rootDeviceIndex].privateSurface = getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties(
|
||||
kernelDeviceInfos[rootDeviceIndex].privateSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(
|
||||
{rootDeviceIndex,
|
||||
static_cast<size_t>(kernelDeviceInfos[rootDeviceIndex].privateSurfaceSize),
|
||||
GraphicsAllocation::AllocationType::PRIVATE_SURFACE,
|
||||
@ -551,7 +552,7 @@ cl_int Kernel::getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info paramName,
|
||||
cl_int Kernel::getWorkGroupInfo(ClDevice &device, cl_kernel_work_group_info paramName,
|
||||
size_t paramValueSize, void *paramValue,
|
||||
size_t *paramValueSizeRet) const {
|
||||
cl_int retVal = CL_INVALID_VALUE;
|
||||
@ -566,7 +567,7 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
|
||||
cl_ulong scratchSize;
|
||||
cl_ulong privateMemSize;
|
||||
size_t maxWorkgroupSize;
|
||||
const auto &hwInfo = getDevice().getHardwareInfo();
|
||||
const auto &hwInfo = device.getHardwareInfo();
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet);
|
||||
|
||||
@ -630,7 +631,7 @@ cl_int Kernel::getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info p
|
||||
return retVal;
|
||||
}
|
||||
|
||||
cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName,
|
||||
cl_int Kernel::getSubGroupInfo(ClDevice &clDevice, cl_kernel_sub_group_info paramName,
|
||||
size_t inputValueSize, const void *inputValue,
|
||||
size_t paramValueSize, void *paramValue,
|
||||
size_t *paramValueSizeRet) const {
|
||||
@ -660,7 +661,7 @@ cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName,
|
||||
}
|
||||
numDimensions = inputValueSize / sizeof(size_t);
|
||||
if (numDimensions == 0 ||
|
||||
numDimensions > static_cast<size_t>(getDevice().getDeviceInfo().maxWorkItemDimensions)) {
|
||||
numDimensions > static_cast<size_t>(clDevice.getDeviceInfo().maxWorkItemDimensions)) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
}
|
||||
@ -674,7 +675,7 @@ cl_int Kernel::getSubGroupInfo(cl_kernel_sub_group_info paramName,
|
||||
}
|
||||
numDimensions = paramValueSize / sizeof(size_t);
|
||||
if (numDimensions == 0 ||
|
||||
numDimensions > static_cast<size_t>(getDevice().getDeviceInfo().maxWorkItemDimensions)) {
|
||||
numDimensions > static_cast<size_t>(clDevice.getDeviceInfo().maxWorkItemDimensions)) {
|
||||
return CL_INVALID_VALUE;
|
||||
}
|
||||
}
|
||||
@ -749,7 +750,7 @@ void Kernel::substituteKernelHeap(void *newKernelHeap, size_t newKernelHeapSize)
|
||||
auto &heapInfo = pKernelInfo->heapInfo;
|
||||
heapInfo.KernelHeapSize = static_cast<uint32_t>(newKernelHeapSize);
|
||||
pKernelInfo->isKernelHeapSubstituted = true;
|
||||
auto memoryManager = getDevice().getMemoryManager();
|
||||
auto memoryManager = executionEnvironment.memoryManager.get();
|
||||
|
||||
auto currentAllocationSize = pKernelInfo->kernelAllocation->getUnderlyingBufferSize();
|
||||
bool status = false;
|
||||
@ -1068,7 +1069,7 @@ inline void Kernel::makeArgsResident(CommandStreamReceiver &commandStreamReceive
|
||||
if (kernelArguments[argIndex].object) {
|
||||
if (kernelArguments[argIndex].type == SVM_ALLOC_OBJ) {
|
||||
auto pSVMAlloc = (GraphicsAllocation *)kernelArguments[argIndex].object;
|
||||
auto pageFaultManager = getDevice().getMemoryManager()->getPageFaultManager();
|
||||
auto pageFaultManager = executionEnvironment.memoryManager->getPageFaultManager();
|
||||
if (pageFaultManager &&
|
||||
this->isUnifiedMemorySyncRequired) {
|
||||
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(pSVMAlloc->getGpuAddress()));
|
||||
@ -1703,7 +1704,7 @@ void Kernel::createReflectionSurface() {
|
||||
kernelReflectionSize += blockCount * alignUp(maxConstantBufferSize, sizeof(void *));
|
||||
kernelReflectionSize += parentImageCount * sizeof(IGIL_ImageParamters);
|
||||
kernelReflectionSize += parentSamplerCount * sizeof(IGIL_ParentSamplerParams);
|
||||
kernelReflectionSurface = getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), kernelReflectionSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()});
|
||||
kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), kernelReflectionSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()});
|
||||
|
||||
for (uint32_t i = 0; i < blockCount; i++) {
|
||||
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
|
||||
@ -1777,7 +1778,7 @@ void Kernel::createReflectionSurface() {
|
||||
|
||||
if (DebugManager.flags.ForceDispatchScheduler.get()) {
|
||||
if (this->isSchedulerKernel && kernelReflectionSurface == nullptr) {
|
||||
kernelReflectionSurface = getDevice().getMemoryManager()->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()});
|
||||
kernelReflectionSurface = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties({getDevice().getRootDeviceIndex(), MemoryConstants::pageSize, GraphicsAllocation::AllocationType::DEVICE_QUEUE_BUFFER, getDevice().getDeviceBitfield()});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include "opencl/extensions/public/cl_ext_private.h"
|
||||
#include "opencl/source/api/cl_types.h"
|
||||
#include "opencl/source/cl_device/cl_device.h"
|
||||
#include "opencl/source/device_queue/device_queue.h"
|
||||
#include "opencl/source/helpers/base_object.h"
|
||||
#include "opencl/source/helpers/properties_helper.h"
|
||||
@ -143,10 +144,10 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName,
|
||||
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
|
||||
|
||||
cl_int getWorkGroupInfo(cl_device_id device, cl_kernel_work_group_info paramName,
|
||||
cl_int getWorkGroupInfo(ClDevice &clDevice, cl_kernel_work_group_info paramName,
|
||||
size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const;
|
||||
|
||||
cl_int getSubGroupInfo(cl_kernel_sub_group_info paramName,
|
||||
cl_int getSubGroupInfo(ClDevice &device, cl_kernel_sub_group_info paramName,
|
||||
size_t inputValueSize, const void *inputValue,
|
||||
size_t paramValueSize, void *paramValue,
|
||||
size_t *paramValueSizeRet) const;
|
||||
@ -509,7 +510,7 @@ class Kernel : public BaseObject<_cl_kernel> {
|
||||
const ClDevice &getDevice() const {
|
||||
return *deviceVector[0];
|
||||
}
|
||||
|
||||
const ExecutionEnvironment &executionEnvironment;
|
||||
Program *program;
|
||||
const ClDeviceVector &deviceVector;
|
||||
const KernelInfo &kernelInfo;
|
||||
|
@ -275,6 +275,8 @@ class Program : public BaseObject<_cl_program> {
|
||||
return 0 != exposedKernels;
|
||||
}
|
||||
|
||||
const ExecutionEnvironment &getExecutionEnvironment() const { return executionEnvironment; }
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL cl_int createProgramFromBinary(const void *pBinary, size_t binarySize, ClDevice &clDevice);
|
||||
|
||||
|
@ -281,7 +281,7 @@ TEST_P(KernelTest, GivenKernelWorkGroupSizeWhenGettingWorkGroupInfoThenWorkGroup
|
||||
pKernel->maxKernelWorkGroupSize = static_cast<uint32_t>(kernelMaxWorkGroupSize);
|
||||
|
||||
retVal = pKernel->getWorkGroupInfo(
|
||||
pClDevice,
|
||||
*pClDevice,
|
||||
paramName,
|
||||
paramValueSize,
|
||||
¶mValue,
|
||||
@ -299,7 +299,7 @@ TEST_P(KernelTest, GivenKernelCompileWorkGroupSizeWhenGettingWorkGroupInfoThenCo
|
||||
size_t paramValueSizeRet = 0;
|
||||
|
||||
retVal = pKernel->getWorkGroupInfo(
|
||||
pClDevice,
|
||||
*pClDevice,
|
||||
paramName,
|
||||
paramValueSize,
|
||||
¶mValue,
|
||||
@ -313,7 +313,7 @@ TEST_P(KernelTest, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValue
|
||||
size_t paramValueSizeRet = 0x1234u;
|
||||
|
||||
retVal = pKernel->getWorkGroupInfo(
|
||||
pClDevice,
|
||||
*pClDevice,
|
||||
0,
|
||||
0,
|
||||
nullptr,
|
||||
@ -2653,13 +2653,13 @@ TEST(KernelTest, givenKernelWhenDebugFlagToUseMaxSimdForCalculationsIsUsedThenMa
|
||||
kernel.executionEnvironment.LargestCompiledSIMDSize = CommonConstants::maximalSimdSize;
|
||||
|
||||
size_t maxKernelWkgSize;
|
||||
kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
|
||||
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
|
||||
EXPECT_EQ(1024u, maxKernelWkgSize);
|
||||
kernel.executionEnvironment.LargestCompiledSIMDSize = 16;
|
||||
kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
|
||||
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
|
||||
EXPECT_EQ(512u, maxKernelWkgSize);
|
||||
kernel.executionEnvironment.LargestCompiledSIMDSize = 8;
|
||||
kernel.mockKernel->getWorkGroupInfo(device.get(), CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
|
||||
kernel.mockKernel->getWorkGroupInfo(*device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxKernelWkgSize, nullptr);
|
||||
EXPECT_EQ(256u, maxKernelWkgSize);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user