mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 20:39:56 +08:00
refactor: Correct logic for SIMD1
- For calculating number of threads per workgroup, for SIMD 1, return local work size (each software thread should be mapped into a whole hardware thread). - Correct logic of calculating space for per thread data for SIMD 1. - Minor: unit tests refactor. - Corrected naming. Related-To: NEO-8261 Signed-off-by: Kacper Nowak <kacper.nowak@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
3c2ef4fbbf
commit
160303924d
@@ -23,22 +23,27 @@ using LocalIdTests = ::testing::Test;
|
||||
|
||||
HWTEST_F(LocalIdTests, GivenSimd8WhenGettingGrfsPerThreadThenOneIsReturned) {
|
||||
uint32_t simd = 8;
|
||||
EXPECT_EQ(1u, getGRFsPerThread(simd, 32));
|
||||
EXPECT_EQ(1u, getNumGrfsPerLocalIdCoordinate(simd, 32));
|
||||
}
|
||||
|
||||
HWTEST_F(LocalIdTests, GivenSimd16WhenGettingGrfsPerThreadThenOneIsReturned) {
|
||||
uint32_t simd = 16;
|
||||
EXPECT_EQ(1u, getGRFsPerThread(simd, 32));
|
||||
EXPECT_EQ(1u, getNumGrfsPerLocalIdCoordinate(simd, 32));
|
||||
}
|
||||
|
||||
HWTEST_F(LocalIdTests, GivenSimd32WhenGettingGrfsPerThreadThenTwoIsReturned) {
|
||||
uint32_t simd = 32;
|
||||
EXPECT_EQ(2u, getGRFsPerThread(simd, 32));
|
||||
EXPECT_EQ(2u, getNumGrfsPerLocalIdCoordinate(simd, 32));
|
||||
}
|
||||
|
||||
HWTEST_F(LocalIdTests, GivenSimd1WhenGettingGrfsPerThreadThenOneIsReturned) {
|
||||
uint32_t simd = 1;
|
||||
EXPECT_EQ(1u, getNumGrfsPerLocalIdCoordinate(simd, 32));
|
||||
}
|
||||
|
||||
HWTEST_F(LocalIdTests, GivenSimd32AndNon32GrfSizeWhenGettingGrfsPerThreadThenTwoIsReturned) {
|
||||
uint32_t simd = 32;
|
||||
EXPECT_EQ(1u, getGRFsPerThread(simd, 33));
|
||||
EXPECT_EQ(1u, getNumGrfsPerLocalIdCoordinate(simd, 33));
|
||||
}
|
||||
|
||||
TEST(LocalID, GivenSimd32AndLws33WhenGettingThreadsPerWorkgroupThenTwoIsReturned) {
|
||||
@@ -78,6 +83,28 @@ TEST(LocalID, GivenSimd1WhenGettingPerThreadSizeLocalIdsThenValueIsEqualGrfSize)
|
||||
EXPECT_EQ(grfSize, getPerThreadSizeLocalIDs(simd, grfSize));
|
||||
}
|
||||
|
||||
TEST(LocalID, WhenThreadsPerWgAreGeneratedThenCalculationsAreCorrect) {
|
||||
const auto lws = 33u;
|
||||
for (const auto &simd : {1u, 8u, 16u, 32u}) {
|
||||
switch (simd) {
|
||||
case 1u:
|
||||
EXPECT_EQ(lws, getThreadsPerWG(simd, lws));
|
||||
break;
|
||||
case 32u:
|
||||
EXPECT_EQ((lws + std::max(32u, simd) - 1) >> 5, getThreadsPerWG(simd, lws));
|
||||
break;
|
||||
case 8u:
|
||||
EXPECT_EQ((lws + simd - 1) >> 3, getThreadsPerWG(simd, lws));
|
||||
break;
|
||||
case 16u:
|
||||
EXPECT_EQ((lws + simd - 1) >> 4, getThreadsPerWG(simd, lws));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LocalIdTest, givenVariadicGrfSizeWhenLocalSizesAreEmittedThenUseFullRowSize) {
|
||||
auto localIdsPtr = allocateAlignedMemory(3 * 64u, MemoryConstants::cacheLineSize);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user