feature: initial register count query implementation

Adds a per-kernel and per-device query to determine the
number of GRF registers that a kernel was compiled for.
This is an informal query for now, but may be added to
a formally supported extension in the future.

Related-To: NEO-9807

Signed-off-by: Ben Ashbaugh <ben.ashbaugh@intel.com>
This commit is contained in:
Ben Ashbaugh
2023-12-01 13:33:08 -08:00
committed by Compute-Runtime-Automation
parent 3b4eb80e89
commit 976c6d3acc
3 changed files with 28 additions and 0 deletions

View File

@@ -343,6 +343,10 @@ typedef cl_bitfield cl_command_queue_mdapi_properties_intel;
#define CL_DEVICE_EU_THREAD_COUNTS_INTEL 0x1000A // placeholder
#define CL_KERNEL_EU_THREAD_COUNT_INTEL 0x1000B // placeholder
#if !defined(cl_intel_maximum_registers)
#define CL_KERNEL_REGISTER_COUNT_INTEL 0x425B
#endif
/*************************************************
* cl_ext_float_atomics extension *
*************************************************/

View File

@@ -577,6 +577,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
cl_ulong scratchSize;
cl_ulong privateMemSize;
size_t maxWorkgroupSize;
cl_uint regCount;
const auto &hwInfo = clDevice.getHardwareInfo();
auto &gfxCoreHelper = this->getGfxCoreHelper();
auto &clGfxCoreHelper = clDevice.getRootDeviceEnvironment().getHelper<ClGfxCoreHelper>();
@@ -630,6 +631,11 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
srcSize = sizeof(cl_uint);
pSrc = &this->getKernelInfo().kernelDescriptor.kernelAttributes.numThreadsRequired;
break;
case CL_KERNEL_REGISTER_COUNT_INTEL:
regCount = kernelDescriptor.kernelAttributes.numGrfRequired;
srcSize = sizeof(cl_uint);
pSrc = &regCount;
break;
default:
break;
}

View File

@@ -3472,6 +3472,24 @@ TEST(KernelTest, givenKernelWithNumThreadsRequiredPatchTokenWhenQueryingEuThread
EXPECT_EQ(123U, euThreadCount);
}
TEST(KernelTest, givenKernelWithNumGRFRequiredPatchTokenWhenQueryingRegisterCountThenRegisterCountIsReturned) {
cl_int retVal = CL_SUCCESS;
KernelInfo kernelInfo = {};
kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired = 213U;
auto rootDeviceIndex = 0u;
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(NEO::defaultHwInfo.get(), rootDeviceIndex));
auto program = std::make_unique<MockProgram>(toClDeviceVector(*device));
MockKernel kernel(program.get(), kernelInfo, *device);
cl_uint regCount;
size_t paramRetSize;
retVal = kernel.getWorkGroupInfo(CL_KERNEL_REGISTER_COUNT_INTEL, sizeof(cl_uint), &regCount, &paramRetSize);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(sizeof(cl_uint), paramRetSize);
EXPECT_EQ(213U, regCount);
}
HWTEST2_F(KernelTest, GivenInlineSamplersWhenSettingInlineSamplerThenDshIsPatched, SupportsSampler) {
auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
MockKernelWithInternals kernel(*device);