fix: correct number of max work group count for concurrent kernel on PVC
for single-CCS mode use all EUs Related-To: NEO-8377 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
parent
5c4be8df54
commit
3e65e7bdba
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2023 Intel Corporation
|
* Copyright (C) 2023-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
|
@ -22,21 +22,21 @@ using KernelImpSuggestMaxCooperativeGroupCountTestsPvc = Test<L0::ult::KernelImp
|
||||||
|
|
||||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenNoBarriersOrSlmUsedWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithSimd) {
|
||||||
auto workGroupSize = lws[0] * lws[1] * lws[2];
|
auto workGroupSize = lws[0] * lws[1] * lws[2];
|
||||||
auto expected = (availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd)) / 2;
|
auto expected = availableThreadCount / Math::divideAndRoundUp(workGroupSize, simd);
|
||||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenBarriersWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToBarriersCount) {
|
||||||
usesBarriers = 1;
|
usesBarriers = 1;
|
||||||
auto expected = (dssCount * (maxBarrierCount / usesBarriers)) / 2;
|
auto expected = dssCount * (maxBarrierCount / usesBarriers);
|
||||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
PVCTEST_F(KernelImpSuggestMaxCooperativeGroupCountTestsPvc, GivenUsedSlmSizeWhenCalculatingMaxCooperativeGroupCountThenResultIsCalculatedWithRegardToUsedSlmSize) {
|
||||||
usedSlm = 64 * MemoryConstants::kiloByte;
|
usedSlm = 64 * MemoryConstants::kiloByte;
|
||||||
auto expected = (availableSlm / usedSlm) / 2;
|
auto expected = availableSlm / usedSlm;
|
||||||
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
EXPECT_EQ(expected, getMaxWorkGroupCount());
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -218,7 +218,7 @@ std::optional<aub_stream::ProductFamily> ProductHelperHw<gfxProduct>::getAubStre
|
||||||
template <>
|
template <>
|
||||||
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel(uint32_t ccsCount) const {
|
uint32_t ProductHelperHw<gfxProduct>::getNumberOfPartsInTileForConcurrentKernel(uint32_t ccsCount) const {
|
||||||
if (ccsCount == 1) {
|
if (ccsCount == 1) {
|
||||||
return 2;
|
return 1;
|
||||||
} else if (ccsCount == 2) {
|
} else if (ccsCount == 2) {
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
|
|
|
@ -188,7 +188,7 @@ PVCTEST_F(GfxCoreHelperTestsPvc, GivenCooperativeEngineSupportedAndNotUsedWhenAd
|
||||||
} else {
|
} else {
|
||||||
for (uint32_t ccsCount : {1, 2, 4}) {
|
for (uint32_t ccsCount : {1, 2, 4}) {
|
||||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = ccsCount;
|
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = ccsCount;
|
||||||
tilePartsForConcurrentKernels = ccsCount == 1 ? 2
|
tilePartsForConcurrentKernels = ccsCount == 1 ? 1
|
||||||
: ccsCount == 2 ? 4
|
: ccsCount == 2 ? 4
|
||||||
: 8;
|
: 8;
|
||||||
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
EXPECT_EQ(passedMaxWorkGroupCount / tilePartsForConcurrentKernels, gfxCoreHelper.adjustMaxWorkGroupCount(passedMaxWorkGroupCount, engineGroupType, rootDeviceEnvironment, isEngineInstanced));
|
||||||
|
|
Loading…
Reference in New Issue