Improve getGRFsPerThread helper

Change-Id: I8d4d2ce35ca6f6054e74930e6a936fc2e49bfcc3
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
Dunajski, Bartosz
2020-02-24 13:21:37 +01:00
committed by sys_ocldev
parent 49aac737a7
commit 5751b5eb27
3 changed files with 17 additions and 10 deletions

View File

@@ -14,8 +14,8 @@
#include <cstdint>
namespace NEO {
inline uint32_t getGRFsPerThread(uint32_t simd) {
return simd == 32 ? 2 : 1;
inline uint32_t getGRFsPerThread(uint32_t simd, uint32_t grfSize) {
return (simd == 32 && grfSize == 32) ? 2 : 1;
}
inline size_t getThreadsPerWG(uint32_t simd, size_t lws) {
@@ -36,7 +36,7 @@ inline size_t getThreadsPerWG(uint32_t simd, size_t lws) {
}
inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32_t numChannels = 3) {
auto numGRFSPerThread = getGRFsPerThread(simd);
auto numGRFSPerThread = getGRFsPerThread(simd, grfSize);
uint32_t returnSize = numGRFSPerThread * grfSize * (simd == 1 ? 1u : numChannels);
returnSize = std::max(returnSize, grfSize);
return returnSize;

View File

@@ -41,7 +41,7 @@ size_t PerThreadDataHelper::sendPerThreadData(
}
uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize) {
uint32_t multiplier = static_cast<uint32_t>(getGRFsPerThread(simd));
uint32_t multiplier = static_cast<uint32_t>(getGRFsPerThread(simd, grfSize));
uint32_t threadPayloadSize = 0;
threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * grfSize;
threadPayloadSize += (threadPayload.HeaderPresent) ? grfSize : 0;

View File

@@ -18,19 +18,26 @@
using namespace NEO;
TEST(LocalID, GivenSimd8WhenGettingGrfsPerThreadThenOneIsReturned) {
using LocalIdTests = ::testing::Test;
HWTEST_F(LocalIdTests, GivenSimd8WhenGettingGrfsPerThreadThenOneIsReturned) {
uint32_t simd = 8;
EXPECT_EQ(1u, getGRFsPerThread(simd));
EXPECT_EQ(1u, getGRFsPerThread(simd, 32));
}
TEST(LocalID, GivenSimd16WhenGettingGrfsPerThreadThenOneIsReturned) {
HWTEST_F(LocalIdTests, GivenSimd16WhenGettingGrfsPerThreadThenOneIsReturned) {
uint32_t simd = 16;
EXPECT_EQ(1u, getGRFsPerThread(simd));
EXPECT_EQ(1u, getGRFsPerThread(simd, 32));
}
TEST(LocalID, GivenSimd32WhenGettingGrfsPerThreadThenTwoIsReturned) {
HWTEST_F(LocalIdTests, GivenSimd32WhenGettingGrfsPerThreadThenTwoIsReturned) {
uint32_t simd = 32;
EXPECT_EQ(2u, getGRFsPerThread(simd));
EXPECT_EQ(2u, getGRFsPerThread(simd, 32));
}
HWTEST_F(LocalIdTests, GivenSimd32AndNon32GrfSizeWhenGettingGrfsPerThreadThenTwoIsReturned) {
uint32_t simd = 32;
EXPECT_EQ(1u, getGRFsPerThread(simd, 33));
}
TEST(LocalID, GivenSimd32AndLws33WhenGettingThreadsPerWorkgroupThenTwoIsReturned) {