fix: Return max wg count equal one on platforms without dispatch all support
Related-To: NEO-14125, GSD-10725 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
parent
c16d02736d
commit
b8be602bfb
|
@ -440,7 +440,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL
|
|||
ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, MatchAny) {
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLaunchCooperativeKernelIsCalledThenCorrectValueIsReturned, HasDispatchAllSupport) {
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
|
@ -716,7 +716,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingRegionGroupBarrierWhenA
|
|||
EXPECT_EQ(nullptr, kernel.getRegionGroupBarrierAllocation());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, whenAppendLaunchCooperativeKernelAndQueryKernelTimestampsToTheSameCmdlistThenFronEndStateIsNotChanged, MatchAny) {
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, whenAppendLaunchCooperativeKernelAndQueryKernelTimestampsToTheSameCmdlistThenFronEndStateIsNotChanged, HasDispatchAllSupport) {
|
||||
Mock<::L0::KernelImp> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
|
|
|
@ -446,7 +446,7 @@ TEST(zeKernelGetProperties, WhenGettingKernelPropertiesThenSuccessIsReturned) {
|
|||
|
||||
using KernelImpSuggestMaxCooperativeGroupCountTests = Test<KernelImpSuggestMaxCooperativeGroupCountFixture>;
|
||||
|
||||
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||
HWTEST2_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd, IsAtLeastBmg) {
|
||||
auto workGroupSize = lws[0] * lws[1] * lws[2];
|
||||
auto expected = availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd);
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
|
@ -472,13 +472,13 @@ HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenMultiTileWhenCalcul
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||
HWTEST2_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount, IsAtLeastBmg) {
|
||||
usesBarriers = 1;
|
||||
auto expected = dssCount * (maxBarrierCount / usesBarriers);
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
HWTEST_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||
HWTEST2_F(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize, IsAtLeastBmg) {
|
||||
usedSlm = 64 * MemoryConstants::kiloByte;
|
||||
auto expected = availableSlm / usedSlm;
|
||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2023-2024 Intel Corporation
|
||||
* Copyright (C) 2023-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -13,11 +13,6 @@
|
|||
#include "level_zero/core/test/unit_tests/fixtures/kernel_max_cooperative_groups_count_fixture.h"
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(KernelImpSuggestMaxCooperativeGroupCountTests, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd, IGFX_PVC);
|
||||
|
||||
using KernelImpSuggestMaxCooperativeGroupCountTestsPvc = Test<L0::ult::KernelImpSuggestMaxCooperativeGroupCountFixture>;
|
||||
|
||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
* Copyright (C) 2019-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -114,7 +114,7 @@ class SyncBufferHandlerTest : public SyncBufferEnqueueHandlerTest {
|
|||
MockCommandQueue *commandQueue;
|
||||
};
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed) {
|
||||
HWTEST2_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurrentKernelWhenEnqueuingKernelThenSyncBufferIsUsed, HasDispatchAllSupport) {
|
||||
patchAllocateSyncBuffer();
|
||||
|
||||
enqueueNDCount();
|
||||
|
@ -142,7 +142,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenAllocateSyncBufferPatchAndConcurr
|
|||
EXPECT_EQ(2u * minimalSyncBufferSize, syncBufferHandler->usedBufferSize);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated) {
|
||||
HWTEST2_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithoutAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsNotCreated, HasDispatchAllSupport) {
|
||||
auto retVal = enqueueNDCount();
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(nullptr, getSyncBufferHandler());
|
||||
|
@ -157,7 +157,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenDefaultKernelUsingSyncBufferWhenE
|
|||
EXPECT_EQ(nullptr, getSyncBufferHandler());
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated) {
|
||||
HWTEST2_TEMPLATED_F(SyncBufferHandlerTest, GivenConcurrentKernelWithAllocateSyncBufferPatchWhenEnqueuingConcurrentKernelThenSyncBufferIsCreated, HasDispatchAllSupport) {
|
||||
patchAllocateSyncBuffer();
|
||||
auto retVal = enqueueNDCount();
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
|
@ -180,7 +180,7 @@ HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenTooHighWorkgroupCountWhenEnqueuin
|
|||
EXPECT_EQ(CL_INVALID_VALUE, retVal);
|
||||
}
|
||||
|
||||
HWTEST_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated) {
|
||||
HWTEST2_TEMPLATED_F(SyncBufferHandlerTest, GivenSyncBufferFullWhenEnqueuingKernelThenNewBufferIsAllocated, HasDispatchAllSupport) {
|
||||
patchAllocateSyncBuffer();
|
||||
enqueueNDCount();
|
||||
auto syncBufferHandler = getSyncBufferHandler();
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -31,7 +31,11 @@ bool GfxCoreHelperHw<GfxFamily>::isCooperativeDispatchSupported(const EngineGrou
|
|||
template <typename GfxFamily>
|
||||
uint32_t GfxCoreHelperHw<GfxFamily>::adjustMaxWorkGroupCount(uint32_t maxWorkGroupCount, const EngineGroupType engineGroupType,
|
||||
const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
return maxWorkGroupCount;
|
||||
if ((debugManager.flags.ForceTheoreticalMaxWorkGroupCount.get()) ||
|
||||
(debugManager.flags.OverrideMaxWorkGroupCount.get() != -1)) {
|
||||
return maxWorkGroupCount;
|
||||
}
|
||||
return 1u;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -96,6 +96,20 @@ using HasOclocZebinFormatEnforced = IsAnyProducts<IGFX_TIGERLAKE_LP,
|
|||
IGFX_ALDERLAKE_P,
|
||||
IGFX_ALDERLAKE_N>;
|
||||
|
||||
struct HasDispatchAllSupport {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
return IsPVC::isMatched<productFamily>() || IsAtLeastBmg::isMatched<productFamily>();
|
||||
}
|
||||
};
|
||||
|
||||
struct DoesNotHaveDispatchAllSupport {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
return !IsPVC::isMatched<productFamily>() && IsAtMostArl::isMatched<productFamily>();
|
||||
}
|
||||
};
|
||||
|
||||
struct IsXeLpg {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
|
|
|
@ -1582,7 +1582,7 @@ HWTEST_F(ProductHelperCommonTest, givenProductHelperWhenCallingIsCalculationForD
|
|||
auto &gfxCoreHelper = getHelper<ProductHelper>();
|
||||
EXPECT_FALSE(gfxCoreHelper.isCalculationForDisablingEuFusionWithDpasNeeded(hwInfo));
|
||||
}
|
||||
HWTEST_F(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned) {
|
||||
HWTEST2_F(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned, IsAtLeastBmg) {
|
||||
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
|
||||
|
@ -1631,6 +1631,24 @@ HWTEST_F(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustM
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTest, GivenGfxHelperWhenAdjustMaxWorkGroupSizeCalledThenOneReturned, DoesNotHaveDispatchAllSupport) {
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
auto engineGroupType = EngineGroupType::compute;
|
||||
EXPECT_EQ(1u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment));
|
||||
}
|
||||
|
||||
HWTEST2_F(GfxCoreHelperTest, GivenGfxHelperWhenForceTheoreticalMaxWorkGroupCountThenAdjustMaxWorkGroupSizeReturnNotOne, DoesNotHaveDispatchAllSupport) {
|
||||
DebugManagerStateRestore dbgRestore;
|
||||
debugManager.flags.ForceTheoreticalMaxWorkGroupCount.set(true);
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
|
||||
auto &gfxCoreHelper = rootDeviceEnvironment.getHelper<GfxCoreHelper>();
|
||||
auto engineGroupType = EngineGroupType::compute;
|
||||
EXPECT_NE(1u, gfxCoreHelper.adjustMaxWorkGroupCount(1024u, engineGroupType, rootDeviceEnvironment));
|
||||
}
|
||||
|
||||
HWTEST_F(GfxCoreHelperTest, givenNumGrfAndSimdSizeWhenAdjustingMaxWorkGroupSizeThenAlwaysReturnDeviceDefault) {
|
||||
const auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
|
|
|
@ -103,7 +103,7 @@ TEST_F(KernelHelperMaxWorkGroupsTests, givenMultipleSubdevicesWenCalculatingMaxW
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToBarriersCount, MatchAny) {
|
||||
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToBarriersCount, HasDispatchAllSupport) {
|
||||
NEO::RAIIProductHelperFactory<MockProductHelperHw<productFamily>> raii(*rootDeviceEnvironment);
|
||||
raii.mockProductHelper->isCooperativeEngineSupportedValue = false;
|
||||
lws[0] = 1;
|
||||
|
@ -121,7 +121,7 @@ HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenBarriersWhenCalculatingMaxWorkGro
|
|||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize, MatchAny) {
|
||||
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenUsedSlmSizeWhenCalculatingMaxWorkGroupsCountThenResultIsCalculatedWithRegardToUsedSlmSize, HasDispatchAllSupport) {
|
||||
NEO::RAIIProductHelperFactory<MockProductHelperHw<productFamily>> raii(*rootDeviceEnvironment);
|
||||
raii.mockProductHelper->isCooperativeEngineSupportedValue = false;
|
||||
usedSlm = 0;
|
||||
|
@ -157,7 +157,7 @@ HWTEST_F(KernelHelperMaxWorkGroupsTests, givenZeroBarriersAndSlmNotUsedWhenCalcu
|
|||
EXPECT_EQ(raiiFactory.mockGfxCoreHelper->alignThreadGroupCountToDssSizeCalledTimes, 0u);
|
||||
}
|
||||
|
||||
TEST_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkGroupsCountThenLowestResultIsAlwaysReturned) {
|
||||
HWTEST2_F(KernelHelperMaxWorkGroupsTests, GivenVariousValuesWhenCalculatingMaxWorkGroupsCountThenLowestResultIsAlwaysReturned, HasDispatchAllSupport) {
|
||||
auto &helper = rootDeviceEnvironment->getHelper<NEO::GfxCoreHelper>();
|
||||
|
||||
engineType = EngineGroupType::cooperativeCompute;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
* Copyright (C) 2022-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -8,5 +8,4 @@
|
|||
#include "shared/test/common/test_macros/hw_test_base.h"
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenAskedIfIsBlitSplitEnqueueWARequiredThenReturnFalse, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(BlitTests, GivenCpuAccessToLocalMemoryWhenGettingMaxBlitSizeThenValuesAreOverriden_BlitPlatforms, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, GivenCooperativeEngineSupportedAndNotUsedWhenAdjustMaxWorkGroupCountIsCalledThenSmallerValueIsReturned, IGFX_PVC);
|
||||
HWTEST_EXCLUDE_PRODUCT(BlitTests, GivenCpuAccessToLocalMemoryWhenGettingMaxBlitSizeThenValuesAreOverriden_BlitPlatforms, IGFX_PVC);
|
Loading…
Reference in New Issue