From 976c6d3accd7052b4fb068c353363ef36ff89aae Mon Sep 17 00:00:00 2001 From: Ben Ashbaugh Date: Fri, 1 Dec 2023 13:33:08 -0800 Subject: [PATCH] feature: initial register count query implementation Adds a per-kernel and per-device query to determine the number of GRF registers that a kernel was compiled for. This is an informal query for now, but may be added to a formally supported extension in the future. Related-To: NEO-9807 Signed-off-by: Ben Ashbaugh --- opencl/extensions/public/cl_ext_private.h | 4 ++++ opencl/source/kernel/kernel.cpp | 6 ++++++ opencl/test/unit_test/kernel/kernel_tests.cpp | 18 ++++++++++++++++++ 3 files changed, 28 insertions(+) diff --git a/opencl/extensions/public/cl_ext_private.h b/opencl/extensions/public/cl_ext_private.h index 8e681c211f..0b0317f4c2 100644 --- a/opencl/extensions/public/cl_ext_private.h +++ b/opencl/extensions/public/cl_ext_private.h @@ -343,6 +343,10 @@ typedef cl_bitfield cl_command_queue_mdapi_properties_intel; #define CL_DEVICE_EU_THREAD_COUNTS_INTEL 0x1000A // placeholder #define CL_KERNEL_EU_THREAD_COUNT_INTEL 0x1000B // placeholder +#if !defined(cl_intel_maximum_registers) +#define CL_KERNEL_REGISTER_COUNT_INTEL 0x425B +#endif + /************************************************* * cl_ext_float_atomics extension * *************************************************/ diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index dfafe2c51e..9338aa1057 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -577,6 +577,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, cl_ulong scratchSize; cl_ulong privateMemSize; size_t maxWorkgroupSize; + cl_uint regCount; const auto &hwInfo = clDevice.getHardwareInfo(); auto &gfxCoreHelper = this->getGfxCoreHelper(); auto &clGfxCoreHelper = clDevice.getRootDeviceEnvironment().getHelper(); @@ -630,6 +631,11 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, srcSize = sizeof(cl_uint); pSrc = &this->getKernelInfo().kernelDescriptor.kernelAttributes.numThreadsRequired; break; + case CL_KERNEL_REGISTER_COUNT_INTEL: + regCount = kernelDescriptor.kernelAttributes.numGrfRequired; + srcSize = sizeof(cl_uint); + pSrc = ®Count; + break; default: break; } diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 7b8290a4bb..7b36b61a86 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -3472,6 +3472,24 @@ TEST(KernelTest, givenKernelWithNumThreadsRequiredPatchTokenWhenQueryingEuThread EXPECT_EQ(123U, euThreadCount); } +TEST(KernelTest, givenKernelWithNumGRFRequiredPatchTokenWhenQueryingRegisterCountThenRegisterCountIsReturned) { + cl_int retVal = CL_SUCCESS; + KernelInfo kernelInfo = {}; + + kernelInfo.kernelDescriptor.kernelAttributes.numGrfRequired = 213U; + auto rootDeviceIndex = 0u; + auto device = std::make_unique(MockDevice::createWithNewExecutionEnvironment(NEO::defaultHwInfo.get(), rootDeviceIndex)); + auto program = std::make_unique(toClDeviceVector(*device)); + MockKernel kernel(program.get(), kernelInfo, *device); + + cl_uint regCount; + size_t paramRetSize; + retVal = kernel.getWorkGroupInfo(CL_KERNEL_REGISTER_COUNT_INTEL, sizeof(cl_uint), ®Count, ¶mRetSize); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(sizeof(cl_uint), paramRetSize); + EXPECT_EQ(213U, regCount); +} + HWTEST2_F(KernelTest, GivenInlineSamplersWhenSettingInlineSamplerThenDshIsPatched, SupportsSampler) { auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment(defaultHwInfo.get()))); MockKernelWithInternals kernel(*device);