mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
feature: initial register count query implementation
Adds a per-kernel and per-device query to determine the number of GRF registers that a kernel was compiled for. This is an informal query for now, but may be added to a formally supported extension in the future. Related-To: NEO-9807 Signed-off-by: Ben Ashbaugh <ben.ashbaugh@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
3b4eb80e89
commit
976c6d3acc
@@ -343,6 +343,10 @@ typedef cl_bitfield cl_command_queue_mdapi_properties_intel;
|
||||
#define CL_DEVICE_EU_THREAD_COUNTS_INTEL 0x1000A // placeholder
|
||||
#define CL_KERNEL_EU_THREAD_COUNT_INTEL 0x1000B // placeholder
|
||||
|
||||
#if !defined(cl_intel_maximum_registers)
|
||||
#define CL_KERNEL_REGISTER_COUNT_INTEL 0x425B
|
||||
#endif
|
||||
|
||||
/*************************************************
|
||||
* cl_ext_float_atomics extension *
|
||||
*************************************************/
|
||||
|
||||
@@ -577,6 +577,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
|
||||
cl_ulong scratchSize;
|
||||
cl_ulong privateMemSize;
|
||||
size_t maxWorkgroupSize;
|
||||
cl_uint regCount;
|
||||
const auto &hwInfo = clDevice.getHardwareInfo();
|
||||
auto &gfxCoreHelper = this->getGfxCoreHelper();
|
||||
auto &clGfxCoreHelper = clDevice.getRootDeviceEnvironment().getHelper<ClGfxCoreHelper>();
|
||||
@@ -630,6 +631,11 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
|
||||
srcSize = sizeof(cl_uint);
|
||||
pSrc = &this->getKernelInfo().kernelDescriptor.kernelAttributes.numThreadsRequired;
|
||||
break;
|
||||
case CL_KERNEL_REGISTER_COUNT_INTEL:
|
||||
regCount = kernelDescriptor.kernelAttributes.numGrfRequired;
|
||||
srcSize = sizeof(cl_uint);
|
||||
pSrc = ®Count;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -3472,6 +3472,24 @@ TEST(KernelTest, givenKernelWithNumThreadsRequiredPatchTokenWhenQueryingEuThread
|
||||
EXPECT_EQ(123U, euThreadCount);
|
||||
}
|
||||
|
||||
TEST(KernelTest, givenKernelWithNumGRFRequiredPatchTokenWhenQueryingRegisterCountThenRegisterCountIsReturned) {
|
||||
cl_int retVal = CL_SUCCESS;
|
||||
KernelInfo kernelInfo = {};
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired = 213U;
|
||||
auto rootDeviceIndex = 0u;
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(NEO::defaultHwInfo.get(), rootDeviceIndex));
|
||||
auto program = std::make_unique<MockProgram>(toClDeviceVector(*device));
|
||||
MockKernel kernel(program.get(), kernelInfo, *device);
|
||||
|
||||
cl_uint regCount;
|
||||
size_t paramRetSize;
|
||||
retVal = kernel.getWorkGroupInfo(CL_KERNEL_REGISTER_COUNT_INTEL, sizeof(cl_uint), ®Count, ¶mRetSize);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(sizeof(cl_uint), paramRetSize);
|
||||
EXPECT_EQ(213U, regCount);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelTest, GivenInlineSamplersWhenSettingInlineSamplerThenDshIsPatched, SupportsSampler) {
|
||||
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
|
||||
MockKernelWithInternals kernel(*device);
|
||||
|
||||
Reference in New Issue
Block a user