diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index 608173a676..e5e80600f7 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1591,7 +1591,14 @@ HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest, givenCooperativeKe itorWalker = find(cmdList.begin(), cmdList.end()); cmd = genCmdCast(*itorWalker); EXPECT_TRUE(cmd->getWorkloadPartitionEnable()); - EXPECT_EQ(16u, cmd->getPartitionSize()); + + const auto &gfxCoreHelper = device->getGfxCoreHelper(); + + if (gfxCoreHelper.singleTileExecImplicitScalingRequired(true)) { + EXPECT_EQ(16u, cmd->getPartitionSize()); + } else { + EXPECT_EQ(4u, cmd->getPartitionSize()); + } } HWTEST2_F(MultiTileCommandListAppendLaunchKernelXeHpCoreTest, diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 759a01f8a6..c76dc1f213 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -1264,7 +1264,9 @@ bool Kernel::hasRunFinished(TimestampPacketContainer *timestampContainer) { } bool Kernel::isSingleSubdevicePreferred() const { - return this->singleSubdevicePreferredInCurrentEnqueue || this->usesSyncBuffer(); + auto &gfxCoreHelper = this->getGfxCoreHelper(); + + return this->singleSubdevicePreferredInCurrentEnqueue || gfxCoreHelper.singleTileExecImplicitScalingRequired(this->usesSyncBuffer()); } void Kernel::setInlineSamplers() { diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index da3ca96a60..c9d479b12b 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -380,6 +380,8 @@ TEST_F(KernelTests, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValu } TEST_F(KernelTests, WhenIsSingleSubdevicePreferredIsCalledThenCorrectValuesAreReturned) { + auto &helper = pClDevice->getGfxCoreHelper(); + std::unique_ptr kernel{MockKernel::create(pClDevice->getDevice(), pProgram)}; for (auto usesSyncBuffer : ::testing::Bool()) { kernel->getAllocatedKernelInfo()->kernelDescriptor.kernelAttributes.flags.usesSyncBuffer = usesSyncBuffer; @@ -387,7 +389,7 @@ TEST_F(KernelTests, WhenIsSingleSubdevicePreferredIsCalledThenCorrectValuesAreRe kernel->singleSubdevicePreferredInCurrentEnqueue = singleSubdevicePreferredInCurrentEnqueue; EXPECT_EQ(usesSyncBuffer, kernel->usesSyncBuffer()); - auto expectedSingleSubdevicePreferredInCurrentEnqueue = singleSubdevicePreferredInCurrentEnqueue || usesSyncBuffer; + auto expectedSingleSubdevicePreferredInCurrentEnqueue = singleSubdevicePreferredInCurrentEnqueue || helper.singleTileExecImplicitScalingRequired(usesSyncBuffer); EXPECT_EQ(expectedSingleSubdevicePreferredInCurrentEnqueue, kernel->isSingleSubdevicePreferred()); } } diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 2f053f5201..bd21414faa 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -403,7 +403,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis !(container.getFlushTaskUsedForImmediate() || container.isUsingPrimaryBuffer()), !args.isKernelDispatchedFromImmediateCmdList, args.dcFlushEnable, - args.isCooperative, + gfxCoreHelper.singleTileExecImplicitScalingRequired(args.isCooperative), workPartitionAllocationGpuVa, hwInfo); } else { diff --git a/shared/source/helpers/gfx_core_helper.h b/shared/source/helpers/gfx_core_helper.h index 830c248f32..846e039572 100644 --- a/shared/source/helpers/gfx_core_helper.h +++ b/shared/source/helpers/gfx_core_helper.h @@ -182,6 +182,8 @@ class GfxCoreHelper { virtual bool is48ResourceNeededForCmdBuffer() const = 0; virtual uint32_t getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const = 0; + virtual bool singleTileExecImplicitScalingRequired(bool cooperativeKernel) const = 0; + virtual ~GfxCoreHelper() = default; protected: @@ -402,6 +404,8 @@ class GfxCoreHelperHw : public GfxCoreHelper { uint32_t getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const override; + bool singleTileExecImplicitScalingRequired(bool cooperativeKernel) const override; + ~GfxCoreHelperHw() override = default; protected: diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl index 0e7f055342..3f437b5d86 100644 --- a/shared/source/helpers/gfx_core_helper_base.inl +++ b/shared/source/helpers/gfx_core_helper_base.inl @@ -740,4 +740,9 @@ bool GfxCoreHelperHw::is48ResourceNeededForCmdBuffer() const { return true; } +template +bool GfxCoreHelperHw::singleTileExecImplicitScalingRequired(bool cooperativeKernel) const { + return cooperativeKernel; +} + } // namespace NEO diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index f3775f4a90..c710690cb8 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2023 Intel Corporation + * Copyright (C) 2021-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1084,7 +1084,14 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScaling, givenCooperativ auto partitionWalkerCmd = genCmdCast(*itor); EXPECT_EQ(DefaultWalkerType::PARTITION_TYPE::PARTITION_TYPE_X, partitionWalkerCmd->getPartitionType()); + + const auto &gfxCoreHelper = pDevice->getGfxCoreHelper(); uint32_t expectedPartitionSize = dims[0]; + + if (!gfxCoreHelper.singleTileExecImplicitScalingRequired(isCooperative)) { + expectedPartitionSize /= dispatchArgs.partitionCount; + } + EXPECT_EQ(expectedPartitionSize, partitionWalkerCmd->getPartitionSize()); } diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index 6fe4b0d74b..8ab642cf72 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -1657,6 +1657,13 @@ TEST_F(GfxCoreHelperTest, whenOnlyPerThreadPrivateMemorySizeIsDefinedThenItIsRet EXPECT_EQ(0x100u, getHelper().getKernelPrivateMemSize(kernelDescriptor)); } +HWTEST_F(GfxCoreHelperTest, givenCooperativeKernelWhenAskingForSingleTileDispatchThenReturnTrue) { + auto &helper = getHelper(); + + EXPECT_TRUE(helper.singleTileExecImplicitScalingRequired(true)); + EXPECT_FALSE(helper.singleTileExecImplicitScalingRequired(false)); +} + HWTEST2_F(GfxCoreHelperTest, whenPrivateScratchSizeIsDefinedThenItIsReturnedAsKernelPrivateMemorySize, IsAtLeastXeHpCore) { KernelDescriptor kernelDescriptor{}; kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize = 0x100u;