fix: Return max wg count equal one on platforms without dispatch all support

Related-To: NEO-14125, GSD-10725
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka 2025-04-15 13:51:31 +00:00 committed by Compute-Runtime-Automation
parent c16d02736d
commit b8be602bfb
9 changed files with 55 additions and 25 deletions

View File

@ -440,7 +440,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL
ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, MatchAny) {
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, HasDispatchAllSupport) {
Mock<::L0::KernelImp> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
@ -716,7 +716,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
EXPECT_EQ(nullptr, kernel.getRegionGroupBarrierAllocation());
}
HWTEST2_F(CommandListAppendLaunchKernel, whenAppendLaunchCooperativeKernelAndQueryKernelTimestampsToTheSameCmdlistThenFronEndStateIsNotChanged, MatchAny) {
HWTEST2_F(CommandListAppendLaunchKernel, whenAppendLaunchCooperativeKernelAndQueryKernelTimestampsToTheSameCmdlistThenFronEndStateIsNotChanged, HasDispatchAllSupport) {
Mock<::L0::KernelImp> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();

View File

@ -446,7 +446,7 @@ TEST(zeKernelGetProperties, WhenGettingKernelPropertiesThenSuccessIsReturned) {
using KernelImpSuggestMaxCooperativeGroupCountTests = Test<KernelImpSuggestMaxCooperativeGroupCountFixture>;
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
HWTEST2_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd, IsAtLeastBmg) {
auto workGroupSize = lws[0] * lws[1] * lws[2];
auto expected = availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd);
EXPECT_EQ(expected, getMaxWorkGroupCount());
@ -472,13 +472,13 @@ HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenMultiTileWhenCalcul
}
}
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
HWTEST2_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount, IsAtLeastBmg) {
usesBarriers = 1;
auto expected = dssCount * (maxBarrierCount / usesBarriers);
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
HWTEST2_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize, IsAtLeastBmg) {
usedSlm = 64 * MemoryConstants::kiloByte;
auto expected = availableSlm / usedSlm;
EXPECT_EQ(expected, getMaxWorkGroupCount());

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -13,11 +13,6 @@
#include "level_zero/core/test/unit_tests/fixtures/kernel_max_cooperative_groups_count_fixture.h"
namespace L0 {
namespace ult {
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize, IGFX_PVC);
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount, IGFX_PVC);
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd, IGFX_PVC);
using KernelImpSuggestMaxCooperativeGroupCountTestsPvc = Test<L0::ult::KernelImpSuggestMaxCooperativeGroupCountFixture>;
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2019-2024 Intel Corporation
* Copyright (C) 2019-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -114,7 +114,7 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
MockCommandQueue *commandQueue;
};
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed) {
HWTEST2_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed, HasDispatchAllSupport) {
patchAllocateSyncBuffer();
enqueueNDCount();
@ -142,7 +142,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurr
EXPECT_EQ(2u * minimalSyncBufferSize, syncBufferHandler->usedBufferSize);
}
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) {
HWTEST2_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated, HasDispatchAllSupport) {
auto retVal = enqueueNDCount();
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(nullptr, getSyncBufferHandler());
@ -157,7 +157,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenDefaultKernelUsingSyncBufferWhenE
EXPECT_EQ(nullptr, getSyncBufferHandler());
}
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated) {
HWTEST2_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated, HasDispatchAllSupport) {
patchAllocateSyncBuffer();
auto retVal = enqueueNDCount();
EXPECT_EQ(CL_SUCCESS, retVal);
@ -180,7 +180,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuin
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated) {
HWTEST2_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated, HasDispatchAllSupport) {
patchAllocateSyncBuffer();
enqueueNDCount();
auto syncBufferHandler = getSyncBufferHandler();

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -31,7 +31,11 @@ bool GfxCoreHelperHw<GfxFamily>::isCooperativeDispatchSupported(const EngineGrou
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
const RootDeviceEnvironment &rootDeviceEnvironment) const {
return maxWorkGroupCount;
if ((debugManager.flags.ForceTheoreticalMaxWorkGroupCount.get()) ||
(debugManager.flags.OverrideMaxWorkGroupCount.get() != -1)) {
return maxWorkGroupCount;
}
return 1u;
}
template <typename GfxFamily>

View File

@ -96,6 +96,20 @@ using HasOclocZebinFormatEnforced = IsAnyProducts<IGFX_TIGERLAKE_LP,
IGFX_ALDERLAKE_P,
IGFX_ALDERLAKE_N>;
struct HasDispatchAllSupport {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return IsPVC::isMatched<productFamily>() || IsAtLeastBmg::isMatched<productFamily>();
}
};
struct DoesNotHaveDispatchAllSupport {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return !IsPVC::isMatched<productFamily>() && IsAtMostArl::isMatched<productFamily>();
}
};
struct IsXeLpg {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {

View File

@ -1582,7 +1582,7 @@ HWTEST_F(ProductHelperCommonTest, givenProductHelperWhenCallingIsCalculationForD
auto &gfxCoreHelper = getHelper<ProductHelper>();
EXPECT_FALSE(gfxCoreHelper.isCalculationForDisablingEuFusionWithDpasNeeded(hwInfo));
}
HWTEST_F(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned) {
HWTEST2_F(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned, IsAtLeastBmg) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
@ -1631,6 +1631,24 @@ HWTEST_F(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustM
}
}
HWTEST2_F(GfxCoreHelperTest, GivenGfxHelperWhenAdjustMaxWorkGroupSizeCalledThenOneReturned, DoesNotHaveDispatchAllSupport) {
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
auto engineGroupType = EngineGroupType::compute;
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment));
}
HWTEST2_F(GfxCoreHelperTest, GivenGfxHelperWhenForceTheoreticalMaxWorkGroupCountThenAdjustMaxWorkGroupSizeReturnNotOne, DoesNotHaveDispatchAllSupport) {
DebugManagerStateRestore dbgRestore;
debugManager.flags.ForceTheoreticalMaxWorkGroupCount.set(true);
MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
auto engineGroupType = EngineGroupType::compute;
EXPECT_NE(1u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment));
}
HWTEST_F(GfxCoreHelperTest, givenNumGrfAndSimdSizeWhenAdjustingMaxWorkGroupSizeThenAlwaysReturnDeviceDefault) {
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();

View File

@ -103,7 +103,7 @@ TEST_F(KernelHelperMaxWorkGroupsTests, givenMultipleSubdevicesWenCalculatingMaxW
}
}
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToBarriersCount, MatchAny) {
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToBarriersCount, HasDispatchAllSupport) {
NEO::RAIIProductHelperFactory<MockProductHelperHw<productFamily>> raii(*rootDeviceEnvironment);
raii.mockProductHelper->isCooperativeEngineSupportedValue = false;
lws[0] = 1;
@ -121,7 +121,7 @@ HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGro
EXPECT_EQ(expected, getMaxWorkGroupCount());
}
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize, MatchAny) {
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize, HasDispatchAllSupport) {
NEO::RAIIProductHelperFactory<MockProductHelperHw<productFamily>> raii(*rootDeviceEnvironment);
raii.mockProductHelper->isCooperativeEngineSupportedValue = false;
usedSlm = 0;
@ -157,7 +157,7 @@ HWTEST_F(KernelHelperMaxWorkGroupsTests, givenZeroBarriersAndSlmNotUsedWhenCalcu
EXPECT_EQ(raiiFactory.mockGfxCoreHelper->alignThreadGroupCountToDssSizeCalledTimes, 0u);
}
TEST_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkGroupsCountThenLowestResultIsAlwaysReturned) {
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkGroupsCountThenLowestResultIsAlwaysReturned, HasDispatchAllSupport) {
auto &helper = rootDeviceEnvironment->getHelper<NEO::GfxCoreHelper>();
engineType = EngineGroupType::cooperativeCompute;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022-2024 Intel Corporation
* Copyright (C) 2022-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -8,5 +8,4 @@
#include "shared/test/common/test_macros/hw_test_base.h"
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenAskedIfIsBlitSplitEnqueueWARequiredThenReturnFalse, IGFX_PVC);
HWTEST_EXCLUDE_PRODUCT(BlitTests, GivenCpuAccessToLocalMemoryWhenGettingMaxBlitSizeThenValuesAreOverriden_BlitPlatforms, IGFX_PVC);
HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned, IGFX_PVC);
HWTEST_EXCLUDE_PRODUCT(BlitTests, GivenCpuAccessToLocalMemoryWhenGettingMaxBlitSizeThenValuesAreOverriden_BlitPlatforms, IGFX_PVC);