diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index c452e6f019..43a7bb0138 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -850,6 +850,7 @@ ze_result_t KernelImp::getKernelName(size_t *pSize, char *pName) { } ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) { + const auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper(); const auto &kernelDescriptor = this->kernelImmData->getDescriptor(); pKernelProperties->numKernelArgs = static_cast(kernelDescriptor.payloadMappings.explicitArgs.size()); pKernelProperties->requiredGroupSizeX = kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0]; @@ -859,12 +860,11 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) pKernelProperties->requiredSubgroupSize = kernelDescriptor.kernelMetadata.requiredSubGroupSize; pKernelProperties->maxSubgroupSize = kernelDescriptor.kernelAttributes.simdSize; pKernelProperties->localMemSize = kernelDescriptor.kernelAttributes.slmInlineSize; - pKernelProperties->privateMemSize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; + pKernelProperties->privateMemSize = gfxCoreHelper.getKernelPrivateMemSize(kernelDescriptor); pKernelProperties->spillMemSize = kernelDescriptor.kernelAttributes.perThreadScratchSize[0]; memset(pKernelProperties->uuid.kid, 0, ZE_MAX_KERNEL_UUID_SIZE); memset(pKernelProperties->uuid.mid, 0, ZE_MAX_MODULE_UUID_SIZE); - const auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper(); uint32_t maxKernelWorkGroupSize = static_cast(this->module->getMaxGroupSize(kernelDescriptor)); maxKernelWorkGroupSize = gfxCoreHelper.adjustMaxWorkGroupSize(kernelDescriptor.kernelAttributes.numGrfRequired, kernelDescriptor.kernelAttributes.simdSize, !kernelRequiresGenerationOfLocalIdsByRuntime, maxKernelWorkGroupSize); diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 79b9ab028b..5424d014f1 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1514,6 +1514,13 @@ TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) { ze_kernel_properties_t kernelPropertiesBefore = {}; kernelPropertiesBefore = kernelProperties; + auto expectedSpillSize = 0x100u; + auto expectedPrivateSize = 0x200u; + + auto &kernelDescriptor = const_cast(kernel->getKernelDescriptor()); + kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = expectedSpillSize; + kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = expectedPrivateSize; + ze_result_t res = kernel->getProperties(&kernelProperties); EXPECT_EQ(ZE_RESULT_SUCCESS, res); @@ -1531,8 +1538,8 @@ TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) { EXPECT_EQ(maxNumSubgroups, kernelProperties.maxNumSubgroups); EXPECT_EQ(sizeof(float) * 16U, kernelProperties.localMemSize); - EXPECT_EQ(0U, kernelProperties.privateMemSize); - EXPECT_EQ(0U, kernelProperties.spillMemSize); + EXPECT_EQ(expectedPrivateSize, kernelProperties.privateMemSize); + EXPECT_EQ(expectedSpillSize, kernelProperties.spillMemSize); uint8_t zeroKid[ZE_MAX_KERNEL_UUID_SIZE]; uint8_t zeroMid[ZE_MAX_MODULE_UUID_SIZE]; @@ -1544,6 +1551,27 @@ TEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) { sizeof(kernelProperties.uuid.mid))); } +HWTEST2_F(KernelPropertiesTests, givenKernelWithPrivateScratchMemoryThenProperPrivateMemorySizeIsReported, IsAtLeastXeHpCore) { + ze_kernel_properties_t kernelProperties = {}; + + kernelProperties.privateMemSize = std::numeric_limits::max(); + kernelProperties.spillMemSize = std::numeric_limits::max(); + + auto expectedSpillSize = 0x100u; + auto expectedPrivateSize = 0x200u; + + auto &kernelDescriptor = const_cast(kernel->getKernelDescriptor()); + kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = expectedSpillSize; + kernelDescriptor.kernelAttributes.perThreadScratchSize[1] = expectedPrivateSize; + kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0xDEAD; + + ze_result_t res = kernel->getProperties(&kernelProperties); + EXPECT_EQ(ZE_RESULT_SUCCESS, res); + + EXPECT_EQ(expectedPrivateSize, kernelProperties.privateMemSize); + EXPECT_EQ(expectedSpillSize, kernelProperties.spillMemSize); +} + using KernelMaxNumSubgroupsTests = Test; HWTEST2_F(KernelMaxNumSubgroupsTests, givenLargeGrfAndSimdSmallerThan32WhenCalculatingMaxWorkGroupSizeThenMaxNumSubgroupsReturnHalfOfDeviceDefault, IsWithinXeGfxFamily) { diff --git a/opencl/source/helpers/cl_gfx_core_helper.h b/opencl/source/helpers/cl_gfx_core_helper.h index 14d550e5e0..a7e6e1d90b 100644 --- a/opencl/source/helpers/cl_gfx_core_helper.h +++ b/opencl/source/helpers/cl_gfx_core_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -40,7 +40,6 @@ class ClGfxCoreHelper : public ApiGfxCoreHelper { virtual bool requiresAuxResolves(const KernelInfo &kernelInfo) const = 0; virtual cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const = 0; virtual bool getQueueFamilyName(std::string &name, EngineGroupType type) const = 0; - virtual cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const = 0; virtual bool preferBlitterForLocalToLocalTransfers() const = 0; virtual bool isSupportedKernelThreadArbitrationPolicy() const = 0; virtual std::vector getSupportedThreadArbitrationPolicies() const = 0; @@ -70,7 +69,6 @@ class ClGfxCoreHelperHw : public ClGfxCoreHelper { bool requiresAuxResolves(const KernelInfo &kernelInfo) const override; cl_command_queue_capabilities_intel getAdditionalDisabledQueueFamilyCapabilities(EngineGroupType type) const override; bool getQueueFamilyName(std::string &name, EngineGroupType type) const override; - cl_ulong getKernelPrivateMemSize(const KernelInfo &kernelInfo) const override; bool preferBlitterForLocalToLocalTransfers() const override; bool isSupportedKernelThreadArbitrationPolicy() const override; std::vector getSupportedThreadArbitrationPolicies() const override; diff --git a/opencl/source/helpers/cl_gfx_core_helper_bdw_and_later.inl b/opencl/source/helpers/cl_gfx_core_helper_bdw_and_later.inl index d8d10076ab..4aff8b0f50 100644 --- a/opencl/source/helpers/cl_gfx_core_helper_bdw_and_later.inl +++ b/opencl/source/helpers/cl_gfx_core_helper_bdw_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,11 +18,6 @@ inline cl_command_queue_capabilities_intel ClGfxCoreHelperHw::getAddi return 0; } -template -cl_ulong ClGfxCoreHelperHw::getKernelPrivateMemSize(const KernelInfo &kernelInfo) const { - return kernelInfo.kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; -} - template cl_device_feature_capabilities_intel ClGfxCoreHelperHw::getSupportedDeviceFeatureCapabilities(const RootDeviceEnvironment &rootDeviceEnvironment) const { return 0; diff --git a/opencl/source/helpers/cl_gfx_core_helper_xehp_and_later.inl b/opencl/source/helpers/cl_gfx_core_helper_xehp_and_later.inl index 9afe32b3d4..57063ce826 100644 --- a/opencl/source/helpers/cl_gfx_core_helper_xehp_and_later.inl +++ b/opencl/source/helpers/cl_gfx_core_helper_xehp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -17,12 +17,6 @@ inline cl_command_queue_capabilities_intel ClGfxCoreHelperHw::getAddi return 0; } -template -cl_ulong ClGfxCoreHelperHw::getKernelPrivateMemSize(const KernelInfo &kernelInfo) const { - const auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes; - return (kernelAttributes.perThreadScratchSize[1] > 0) ? kernelAttributes.perThreadScratchSize[1] : kernelAttributes.perHwThreadPrivateMemorySize; -} - template cl_device_feature_capabilities_intel ClGfxCoreHelperHw::getSupportedDeviceFeatureCapabilities(const RootDeviceEnvironment &rootDeviceEnvironment) const { diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 41c0fe1a6c..1adb58ddca 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -580,7 +580,6 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, cl_uint regCount; const auto &hwInfo = clDevice.getHardwareInfo(); auto &gfxCoreHelper = this->getGfxCoreHelper(); - auto &clGfxCoreHelper = clDevice.getRootDeviceEnvironment().getHelper(); GetInfoHelper info(paramValue, paramValueSize, paramValueSizeRet); switch (paramName) { @@ -623,7 +622,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, pSrc = &scratchSize; break; case CL_KERNEL_PRIVATE_MEM_SIZE: - privateMemSize = clGfxCoreHelper.getKernelPrivateMemSize(kernelInfo); + privateMemSize = gfxCoreHelper.getKernelPrivateMemSize(kernelDescriptor); srcSize = sizeof(privateMemSize); pSrc = &privateMemSize; break; diff --git a/shared/source/helpers/gfx_core_helper.h b/shared/source/helpers/gfx_core_helper.h index 36a42f0ad9..75bfecb820 100644 --- a/shared/source/helpers/gfx_core_helper.h +++ b/shared/source/helpers/gfx_core_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -177,6 +177,7 @@ class GfxCoreHelper { virtual uint32_t getContextGroupContextsCount() const = 0; virtual bool is48ResourceNeededForCmdBuffer() const = 0; + virtual uint32_t getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const = 0; virtual ~GfxCoreHelper() = default; @@ -394,6 +395,8 @@ class GfxCoreHelperHw : public GfxCoreHelper { bool is48ResourceNeededForCmdBuffer() const override; + uint32_t getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const override; + ~GfxCoreHelperHw() override = default; protected: diff --git a/shared/source/helpers/gfx_core_helper_bdw_and_later.inl b/shared/source/helpers/gfx_core_helper_bdw_and_later.inl index ab3c525d07..e3ff8732ab 100644 --- a/shared/source/helpers/gfx_core_helper_bdw_and_later.inl +++ b/shared/source/helpers/gfx_core_helper_bdw_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2023 Intel Corporation + * Copyright (C) 2019-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -8,6 +8,7 @@ #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_info.h" +#include "shared/source/kernel/kernel_descriptor.h" namespace NEO { @@ -150,4 +151,9 @@ template uint32_t GfxCoreHelperHw::getMinimalScratchSpaceSize() const { return 1024U; } + +template +uint32_t GfxCoreHelperHw::getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const { + return kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize; +} } // namespace NEO diff --git a/shared/source/helpers/gfx_core_helper_xehp_and_later.inl b/shared/source/helpers/gfx_core_helper_xehp_and_later.inl index 4d245e865f..4048f224da 100644 --- a/shared/source/helpers/gfx_core_helper_xehp_and_later.inl +++ b/shared/source/helpers/gfx_core_helper_xehp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -199,4 +199,9 @@ bool GfxCoreHelperHw::largeGrfModeSupported() const { return true; } +template +uint32_t GfxCoreHelperHw::getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const { + const auto &kernelAttributes = kernelDescriptor.kernelAttributes; + return (kernelAttributes.perThreadScratchSize[1] > 0) ? kernelAttributes.perThreadScratchSize[1] : kernelAttributes.perHwThreadPrivateMemorySize; +} } // namespace NEO diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index 17be5783eb..bdd2b2f22a 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2023 Intel Corporation + * Copyright (C) 2022-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1614,3 +1614,16 @@ TEST_F(GfxCoreHelperTest, givenContextGroupEnabledWithDebugKeyWhenContextGroupCo HWTEST_F(GfxCoreHelperTest, whenAskingIf48bResourceNeededForCmdBufferThenReturnTrue) { EXPECT_TRUE(getHelper().is48ResourceNeededForCmdBuffer()); } + +TEST_F(GfxCoreHelperTest, whenOnlyPerThreadPrivateMemorySizeIsDefinedThenItIsReturnedAsKernelPrivateMemorySize) { + KernelDescriptor kernelDescriptor{}; + kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0x100u; + EXPECT_EQ(0x100u, getHelper().getKernelPrivateMemSize(kernelDescriptor)); +} + +HWTEST2_F(GfxCoreHelperTest, whenPrivateScratchSizeIsDefinedThenItIsReturnedAsKernelPrivateMemorySize, IsAtLeastXeHpCore) { + KernelDescriptor kernelDescriptor{}; + kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0x100u; + kernelDescriptor.kernelAttributes.perThreadScratchSize[1] = 0x200u; + EXPECT_EQ(0x200u, getHelper().getKernelPrivateMemSize(kernelDescriptor)); +}