mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 22:12:59 +08:00
fix: Unify logic calculating threads per work group part 2
- use calculateNumThreadsPerThreadGroup instead of getThreadsPerWG to have same flow and proper values of threads per work groups Related-To: NEO-8087 Signed-off-by: Cencelewska, Katarzyna <katarzyna.cencelewska@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c294ef48ce
commit
1e8a53bd53
@@ -7,8 +7,10 @@
|
||||
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/local_id_gen.h"
|
||||
#include "shared/source/helpers/ptr_math.h"
|
||||
#include "shared/test/common/helpers/default_hw_info.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
@@ -75,14 +77,16 @@ TEST(LocalID, GivenSimd1WhenGettingPerThreadSizeLocalIdsThenValueIsEqualGrfSize)
|
||||
|
||||
EXPECT_EQ(grfSize, getPerThreadSizeLocalIDs(simd, grfSize));
|
||||
}
|
||||
TEST(LocalID, givenVariadicGrfSizeWhenLocalSizesAreEmittedThenUseFullRowSize) {
|
||||
|
||||
TEST(LocalIdTest, givenVariadicGrfSizeWhenLocalSizesAreEmittedThenUseFullRowSize) {
|
||||
auto localIdsPtr = allocateAlignedMemory(3 * 64u, MemoryConstants::cacheLineSize);
|
||||
|
||||
uint16_t *localIdsView = reinterpret_cast<uint16_t *>(localIdsPtr.get());
|
||||
std::array<uint16_t, 3u> localSizes = {{2u, 2u, 1u}};
|
||||
std::array<uint8_t, 3u> dimensionsOrder = {{0u, 1u, 2u}};
|
||||
|
||||
generateLocalIDs(localIdsPtr.get(), 16u, localSizes, dimensionsOrder, false, 64u);
|
||||
auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
generateLocalIDs(localIdsPtr.get(), 16u, localSizes, dimensionsOrder, false, 64u, *gfxCoreHelper.get());
|
||||
EXPECT_EQ(localIdsView[0], 0u);
|
||||
EXPECT_EQ(localIdsView[1], 1u);
|
||||
EXPECT_EQ(localIdsView[2], 0u);
|
||||
@@ -277,37 +281,42 @@ struct LocalIDFixture : ::testing::TestWithParam<std::tuple<int, int, int, int,
|
||||
};
|
||||
|
||||
HWTEST_P(LocalIDFixture, WhenGeneratingLocalIdsThenIdsAreWithinLimits) {
|
||||
auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
|
||||
std::array<uint8_t, 3>{{0, 1, 2}}, false, grfSize);
|
||||
std::array<uint8_t, 3>{{0, 1, 2}}, false, grfSize, *gfxCoreHelper.get());
|
||||
validateIDWithinLimits(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
|
||||
}
|
||||
|
||||
HWTEST_P(LocalIDFixture, WhenGeneratingLocalIdsThenAllWorkItemsCovered) {
|
||||
auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
|
||||
std::array<uint8_t, 3>{{0, 1, 2}}, false, grfSize);
|
||||
std::array<uint8_t, 3>{{0, 1, 2}}, false, grfSize, *gfxCoreHelper.get());
|
||||
validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
|
||||
}
|
||||
|
||||
HWTEST_P(LocalIDFixture, WhenWalkOrderIsXyzThenProperLocalIdsAreGenerated) {
|
||||
auto dimensionsOrder = std::array<uint8_t, 3>{{0, 1, 2}};
|
||||
auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
|
||||
dimensionsOrder, false, grfSize);
|
||||
dimensionsOrder, false, grfSize, *gfxCoreHelper.get());
|
||||
validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
|
||||
validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder);
|
||||
}
|
||||
|
||||
HWTEST_P(LocalIDFixture, WhenWalkOrderIsYxzThenProperLocalIdsAreGenerated) {
|
||||
auto dimensionsOrder = std::array<uint8_t, 3>{{1, 0, 2}};
|
||||
auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
|
||||
dimensionsOrder, false, grfSize);
|
||||
dimensionsOrder, false, grfSize, *gfxCoreHelper.get());
|
||||
validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
|
||||
validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder);
|
||||
}
|
||||
|
||||
HWTEST_P(LocalIDFixture, WhenWalkOrderIsZyxThenProperLocalIdsAreGenerated) {
|
||||
auto dimensionsOrder = std::array<uint8_t, 3>{{2, 1, 0}};
|
||||
auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
generateLocalIDs(buffer, simd, std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizeX), static_cast<uint16_t>(localWorkSizeY), static_cast<uint16_t>(localWorkSizeZ)}},
|
||||
dimensionsOrder, false, grfSize);
|
||||
dimensionsOrder, false, grfSize, *gfxCoreHelper.get());
|
||||
validateAllWorkItemsCovered(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, UnitTestHelper<FamilyType>::useFullRowForLocalIdsGeneration);
|
||||
validateWalkOrder(simd, localWorkSizeX, localWorkSizeY, localWorkSizeZ, dimensionsOrder);
|
||||
}
|
||||
@@ -335,8 +344,8 @@ struct LocalIdsLayoutForImagesTest : ::testing::TestWithParam<std::tuple<uint16_
|
||||
rowWidth = simd == 32u ? 32u : 16u;
|
||||
xDelta = simd == 8u ? 2u : 4u;
|
||||
}
|
||||
void generateLocalIds() {
|
||||
|
||||
void generateLocalIds() {
|
||||
auto numGrfs = (localWorkSize.at(0) * localWorkSize.at(1) + (simd - 1)) / simd;
|
||||
elemsInBuffer = 3u * simd * numGrfs;
|
||||
if (simd == 8u) {
|
||||
@@ -347,7 +356,8 @@ struct LocalIdsLayoutForImagesTest : ::testing::TestWithParam<std::tuple<uint16_
|
||||
memset(memory.get(), 0xff, size);
|
||||
buffer = reinterpret_cast<uint16_t *>(memory.get());
|
||||
EXPECT_TRUE(isCompatibleWithLayoutForImages(localWorkSize, dimensionsOrder, simd));
|
||||
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true, grfSize);
|
||||
auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
generateLocalIDs(buffer, simd, localWorkSize, dimensionsOrder, true, grfSize, *gfxCoreHelper.get());
|
||||
}
|
||||
void validateGRF() {
|
||||
uint32_t totalLocalIds = localWorkSize.at(0) * localWorkSize.at(1);
|
||||
@@ -447,9 +457,9 @@ TEST_P(LocalIdsLayoutTest, givenLocalWorkgroupSize4x4x1WhenGenerateLocalIdsThenH
|
||||
auto alignedMemory2 = allocateAlignedMemory(size, 32);
|
||||
auto buffer2 = reinterpret_cast<uint16_t *>(alignedMemory2.get());
|
||||
memset(buffer2, 0xff, size);
|
||||
|
||||
generateLocalIDs(buffer1, simd, localWorkSize, dimensionsOrder, false, grfSize);
|
||||
generateLocalIDs(buffer2, simd, localWorkSize, dimensionsOrder, true, grfSize);
|
||||
auto gfxCoreHelper = GfxCoreHelper::create(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
generateLocalIDs(buffer1, simd, localWorkSize, dimensionsOrder, false, grfSize, *gfxCoreHelper.get());
|
||||
generateLocalIDs(buffer2, simd, localWorkSize, dimensionsOrder, true, grfSize, *gfxCoreHelper.get());
|
||||
|
||||
for (auto i = 0u; i < elemsInBuffer / rowWidth; i++) {
|
||||
for (auto j = 0u; j < rowWidth; j++) {
|
||||
|
||||
Reference in New Issue
Block a user