fix: Align thread group count to dss on all platforms
Related-To: NEO-13263, GSD-10327 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
parent
9b4bb0c9ea
commit
62d8e3e4b0
|
@ -802,6 +802,8 @@ uint32_t GfxCoreHelperHw<GfxFamily>::calculateAvailableThreadCount(const Hardwar
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
void GfxCoreHelperHw<GfxFamily>::alignThreadGroupCountToDssSize(uint32_t &threadCount, uint32_t dssCount, uint32_t threadsPerDss, uint32_t threadGroupSize) const {
|
void GfxCoreHelperHw<GfxFamily>::alignThreadGroupCountToDssSize(uint32_t &threadCount, uint32_t dssCount, uint32_t threadsPerDss, uint32_t threadGroupSize) const {
|
||||||
|
uint32_t availableTreadCount = (threadsPerDss / threadGroupSize) * dssCount;
|
||||||
|
threadCount = std::min(threadCount, availableTreadCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|
|
@ -1874,10 +1874,32 @@ HWTEST_F(GfxCoreHelperTest, givenGetDeviceTimestampWidthCalledThenReturnCorrectV
|
||||||
EXPECT_EQ(64u, helper.getDeviceTimestampWidth());
|
EXPECT_EQ(64u, helper.getDeviceTimestampWidth());
|
||||||
}
|
}
|
||||||
|
|
||||||
HWTEST2_F(GfxCoreHelperTest, givenHwHelperWhenAligningThreadGroupCountToDssSizeThenThreadGroupCountDoesNotChange, IsAtMostXe2HpgCore) {
|
HWTEST_F(GfxCoreHelperTest, givenHwHelperWhenAligningThreadGroupCountToDssSizeThenThreadGroupCountChanged) {
|
||||||
auto &helper = getHelper<GfxCoreHelper>();
|
auto &helper = getHelper<GfxCoreHelper>();
|
||||||
uint32_t threadGroupCountBefore = 4096;
|
uint32_t threadGroupCountBefore = 4096;
|
||||||
uint32_t threadCount = threadGroupCountBefore;
|
uint32_t threadCount = threadGroupCountBefore;
|
||||||
helper.alignThreadGroupCountToDssSize(threadCount, 1, 1, 1);
|
helper.alignThreadGroupCountToDssSize(threadCount, 1, 1, 1);
|
||||||
EXPECT_EQ(threadGroupCountBefore, threadCount);
|
EXPECT_NE(threadGroupCountBefore, threadCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(GfxCoreHelperTest, givenHwHelperWhenThreadGroupCountIsAlignedToDssThenThreadCountNotChanged) {
|
||||||
|
auto &helper = getHelper<GfxCoreHelper>();
|
||||||
|
uint32_t dssCount = 16;
|
||||||
|
uint32_t threadGroupSize = 32;
|
||||||
|
uint32_t threadsPerDss = 2 * threadGroupSize;
|
||||||
|
uint32_t maxThreadCount = (dssCount * threadsPerDss) / threadGroupSize;
|
||||||
|
uint32_t threadCount = maxThreadCount;
|
||||||
|
helper.alignThreadGroupCountToDssSize(threadCount, dssCount, threadsPerDss, threadGroupSize);
|
||||||
|
EXPECT_EQ(2 * dssCount, threadCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(GfxCoreHelperTest, givenHwHelperWhenThreadGroupCountIsAlignedToDssThenThreadCountChanged) {
|
||||||
|
auto &helper = getHelper<GfxCoreHelper>();
|
||||||
|
uint32_t dssCount = 16;
|
||||||
|
uint32_t threadGroupSize = 32;
|
||||||
|
uint32_t threadsPerDss = 2 * threadGroupSize - 1;
|
||||||
|
uint32_t maxThreadCount = (dssCount * threadsPerDss) / threadGroupSize;
|
||||||
|
uint32_t threadCount = maxThreadCount;
|
||||||
|
helper.alignThreadGroupCountToDssSize(threadCount, dssCount, threadsPerDss, threadGroupSize);
|
||||||
|
EXPECT_EQ(dssCount, threadCount);
|
||||||
}
|
}
|
Loading…
Reference in New Issue