diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 54dc59b10d..19458541eb 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -897,7 +897,7 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) pKernelProperties->requiredNumSubGroups = kernelDescriptor.kernelMetadata.compiledSubGroupsNumber; pKernelProperties->requiredSubgroupSize = kernelDescriptor.kernelMetadata.requiredSubGroupSize; pKernelProperties->maxSubgroupSize = kernelDescriptor.kernelAttributes.simdSize; - pKernelProperties->localMemSize = kernelDescriptor.kernelAttributes.slmInlineSize; + pKernelProperties->localMemSize = this->getSlmTotalSize(); pKernelProperties->privateMemSize = gfxCoreHelper.getKernelPrivateMemSize(kernelDescriptor); pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.spillFillScratchMemorySize; memset(pKernelProperties->uuid.kid, 0, ZE_MAX_KERNEL_UUID_SIZE); diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index a851aa4f06..a95259ccbc 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1558,6 +1558,7 @@ class KernelPropertiesTests : public ModuleFixture, public ::testing::Test { class MockKernel : public KernelImp { public: using KernelImp::kernelHasIndirectAccess; + using KernelImp::slmArgsTotalSize; }; void SetUp() override { debugManager.flags.FailBuildProgramWithStatefulAccess.set(0); @@ -1680,6 +1681,25 @@ HWTEST2_F(KernelPropertiesTests, givenKernelWithPrivateScratchMemoryThenProperPr EXPECT_EQ(expectedSpillSize, kernelProperties.spillMemSize); } +TEST_F(KernelPropertiesTests, givenKernelWithInlineAndDynamicSharedLocalMemoryThenTotalLocalMemorySizeIsReported) { + ze_kernel_properties_t kernelProperties = {}; + kernelProperties.localMemSize = std::numeric_limits::max(); + + uint32_t slmInlineSize = 100u; + uint32_t slmArgsSize = 4096u; + uint32_t expectedSlmTotalSize = slmInlineSize + slmArgsSize; + + auto &kernelDescriptor = const_cast(kernel->getKernelDescriptor()); + kernelDescriptor.kernelAttributes.slmInlineSize = slmInlineSize; + + kernel->slmArgsTotalSize = slmArgsSize; + + ze_result_t res = kernel->getProperties(&kernelProperties); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + EXPECT_EQ(expectedSlmTotalSize, kernelProperties.localMemSize); +} + using KernelMaxNumSubgroupsTests = Test; HWTEST2_F(KernelMaxNumSubgroupsTests, givenLargeGrfAndSimdSmallerThan32WhenCalculatingMaxWorkGroupSizeThenMaxNumSubgroupsReturnHalfOfDeviceDefault, IsWithinXeGfxFamily) { diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 8e127249bd..c647193e26 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -640,7 +640,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, break; case CL_KERNEL_LOCAL_MEM_SIZE: - localMemorySize = kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize; + localMemorySize = this->getSlmTotalSize(); srcSize = sizeof(localMemorySize); pSrc = &localMemorySize; break; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 5778e6ded5..03272980ba 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -339,6 +339,42 @@ TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPreferredWorkGro EXPECT_EQ(expectedValue, paramValue); } +TEST_F(KernelTests, GivenSlmInlineSizeAndSlmOffsetWhenGettingWorkGroupInfoThenCorrectValueIsReturned) { + MockKernelInfo kernelInfo = {}; + kernelInfo.kernelDescriptor.kernelAttributes.slmInlineSize = 100u; + + kernelInfo.addArgLocal(0, 0x10, 0x1); + kernelInfo.addArgBuffer(1, 0x20, sizeof(void *)); + kernelInfo.addArgBuffer(2, 0x20, sizeof(void *)); + kernelInfo.addArgLocal(3, 0x30, 0x10); + + MockKernel kernel(pProgram, kernelInfo, *pClDevice); + kernel.kernelArguments.resize(4); + kernel.slmSizes.resize(4); + + uint32_t crossThreadData[0x40]{}; + crossThreadData[0x20 / sizeof(uint32_t)] = 0x12344321; + kernel.setCrossThreadData(crossThreadData, sizeof(crossThreadData)); + + kernel.setArgLocal(0, 4096, nullptr); + kernel.setArgLocal(3, 0, nullptr); + + cl_kernel_info paramName = CL_KERNEL_LOCAL_MEM_SIZE; + cl_ulong paramValue; + size_t paramValueSizeRet = 0; + cl_ulong expectedValue = 4096 + 0 + 100; + + retVal = kernel.getWorkGroupInfo( + paramName, + sizeof(cl_ulong), + ¶mValue, + ¶mValueSizeRet); + + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(sizeof(cl_ulong), paramValueSizeRet); + EXPECT_EQ(expectedValue, paramValue); +} + TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhenGettingPreferredWorkGroupSizeMultipleThenCorectValueIsReturned) { DebugManagerStateRestore dbgRestorer; debugManager.flags.CFEFusedEUDispatch.set(0);