mirror of
https://github.com/intel/compute-runtime.git
synced 2025-11-10 05:49:51 +08:00
Improve getGRFsPerThread helper
Change-Id: I8d4d2ce35ca6f6054e74930e6a936fc2e49bfcc3 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
49aac737a7
commit
5751b5eb27
@@ -14,8 +14,8 @@
|
||||
#include <cstdint>
|
||||
|
||||
namespace NEO {
|
||||
inline uint32_t getGRFsPerThread(uint32_t simd) {
|
||||
return simd == 32 ? 2 : 1;
|
||||
inline uint32_t getGRFsPerThread(uint32_t simd, uint32_t grfSize) {
|
||||
return (simd == 32 && grfSize == 32) ? 2 : 1;
|
||||
}
|
||||
|
||||
inline size_t getThreadsPerWG(uint32_t simd, size_t lws) {
|
||||
@@ -36,7 +36,7 @@ inline size_t getThreadsPerWG(uint32_t simd, size_t lws) {
|
||||
}
|
||||
|
||||
inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32_t numChannels = 3) {
|
||||
auto numGRFSPerThread = getGRFsPerThread(simd);
|
||||
auto numGRFSPerThread = getGRFsPerThread(simd, grfSize);
|
||||
uint32_t returnSize = numGRFSPerThread * grfSize * (simd == 1 ? 1u : numChannels);
|
||||
returnSize = std::max(returnSize, grfSize);
|
||||
return returnSize;
|
||||
|
||||
@@ -41,7 +41,7 @@ size_t PerThreadDataHelper::sendPerThreadData(
|
||||
}
|
||||
|
||||
uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize) {
|
||||
uint32_t multiplier = static_cast<uint32_t>(getGRFsPerThread(simd));
|
||||
uint32_t multiplier = static_cast<uint32_t>(getGRFsPerThread(simd, grfSize));
|
||||
uint32_t threadPayloadSize = 0;
|
||||
threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * grfSize;
|
||||
threadPayloadSize += (threadPayload.HeaderPresent) ? grfSize : 0;
|
||||
|
||||
@@ -18,19 +18,26 @@
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
TEST(LocalID, GivenSimd8WhenGettingGrfsPerThreadThenOneIsReturned) {
|
||||
using LocalIdTests = ::testing::Test;
|
||||
|
||||
HWTEST_F(LocalIdTests, GivenSimd8WhenGettingGrfsPerThreadThenOneIsReturned) {
|
||||
uint32_t simd = 8;
|
||||
EXPECT_EQ(1u, getGRFsPerThread(simd));
|
||||
EXPECT_EQ(1u, getGRFsPerThread(simd, 32));
|
||||
}
|
||||
|
||||
TEST(LocalID, GivenSimd16WhenGettingGrfsPerThreadThenOneIsReturned) {
|
||||
HWTEST_F(LocalIdTests, GivenSimd16WhenGettingGrfsPerThreadThenOneIsReturned) {
|
||||
uint32_t simd = 16;
|
||||
EXPECT_EQ(1u, getGRFsPerThread(simd));
|
||||
EXPECT_EQ(1u, getGRFsPerThread(simd, 32));
|
||||
}
|
||||
|
||||
TEST(LocalID, GivenSimd32WhenGettingGrfsPerThreadThenTwoIsReturned) {
|
||||
HWTEST_F(LocalIdTests, GivenSimd32WhenGettingGrfsPerThreadThenTwoIsReturned) {
|
||||
uint32_t simd = 32;
|
||||
EXPECT_EQ(2u, getGRFsPerThread(simd));
|
||||
EXPECT_EQ(2u, getGRFsPerThread(simd, 32));
|
||||
}
|
||||
|
||||
HWTEST_F(LocalIdTests, GivenSimd32AndNon32GrfSizeWhenGettingGrfsPerThreadThenTwoIsReturned) {
|
||||
uint32_t simd = 32;
|
||||
EXPECT_EQ(1u, getGRFsPerThread(simd, 33));
|
||||
}
|
||||
|
||||
TEST(LocalID, GivenSimd32AndLws33WhenGettingThreadsPerWorkgroupThenTwoIsReturned) {
|
||||
|
||||
Reference in New Issue
Block a user