From 5751b5eb272f9244965e67b210137a6b2078fc24 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Mon, 24 Feb 2020 13:21:37 +0100 Subject: [PATCH] Improve getGRFsPerThread helper Change-Id: I8d4d2ce35ca6f6054e74930e6a936fc2e49bfcc3 Signed-off-by: Dunajski, Bartosz --- opencl/source/command_queue/local_id_gen.h | 6 +++--- opencl/source/helpers/per_thread_data.cpp | 2 +- .../command_queue/local_id_tests.cpp | 19 +++++++++++++------ 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/opencl/source/command_queue/local_id_gen.h b/opencl/source/command_queue/local_id_gen.h index 3086300b9a..2245f3ec56 100644 --- a/opencl/source/command_queue/local_id_gen.h +++ b/opencl/source/command_queue/local_id_gen.h @@ -14,8 +14,8 @@ #include namespace NEO { -inline uint32_t getGRFsPerThread(uint32_t simd) { - return simd == 32 ? 2 : 1; +inline uint32_t getGRFsPerThread(uint32_t simd, uint32_t grfSize) { + return (simd == 32 && grfSize == 32) ? 2 : 1; } inline size_t getThreadsPerWG(uint32_t simd, size_t lws) { @@ -36,7 +36,7 @@ inline size_t getThreadsPerWG(uint32_t simd, size_t lws) { } inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32_t numChannels = 3) { - auto numGRFSPerThread = getGRFsPerThread(simd); + auto numGRFSPerThread = getGRFsPerThread(simd, grfSize); uint32_t returnSize = numGRFSPerThread * grfSize * (simd == 1 ? 1u : numChannels); returnSize = std::max(returnSize, grfSize); return returnSize; diff --git a/opencl/source/helpers/per_thread_data.cpp b/opencl/source/helpers/per_thread_data.cpp index 8097e40a08..e1efc57f31 100644 --- a/opencl/source/helpers/per_thread_data.cpp +++ b/opencl/source/helpers/per_thread_data.cpp @@ -41,7 +41,7 @@ size_t PerThreadDataHelper::sendPerThreadData( } uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize) { - uint32_t multiplier = static_cast(getGRFsPerThread(simd)); + uint32_t multiplier = static_cast(getGRFsPerThread(simd, grfSize)); uint32_t threadPayloadSize = 0; threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * grfSize; threadPayloadSize += (threadPayload.HeaderPresent) ? grfSize : 0; diff --git a/opencl/test/unit_test/command_queue/local_id_tests.cpp b/opencl/test/unit_test/command_queue/local_id_tests.cpp index 115b989c0b..6f32af1f4e 100644 --- a/opencl/test/unit_test/command_queue/local_id_tests.cpp +++ b/opencl/test/unit_test/command_queue/local_id_tests.cpp @@ -18,19 +18,26 @@ using namespace NEO; -TEST(LocalID, GivenSimd8WhenGettingGrfsPerThreadThenOneIsReturned) { +using LocalIdTests = ::testing::Test; + +HWTEST_F(LocalIdTests, GivenSimd8WhenGettingGrfsPerThreadThenOneIsReturned) { uint32_t simd = 8; - EXPECT_EQ(1u, getGRFsPerThread(simd)); + EXPECT_EQ(1u, getGRFsPerThread(simd, 32)); } -TEST(LocalID, GivenSimd16WhenGettingGrfsPerThreadThenOneIsReturned) { +HWTEST_F(LocalIdTests, GivenSimd16WhenGettingGrfsPerThreadThenOneIsReturned) { uint32_t simd = 16; - EXPECT_EQ(1u, getGRFsPerThread(simd)); + EXPECT_EQ(1u, getGRFsPerThread(simd, 32)); } -TEST(LocalID, GivenSimd32WhenGettingGrfsPerThreadThenTwoIsReturned) { +HWTEST_F(LocalIdTests, GivenSimd32WhenGettingGrfsPerThreadThenTwoIsReturned) { uint32_t simd = 32; - EXPECT_EQ(2u, getGRFsPerThread(simd)); + EXPECT_EQ(2u, getGRFsPerThread(simd, 32)); +} + +HWTEST_F(LocalIdTests, GivenSimd32AndNon32GrfSizeWhenGettingGrfsPerThreadThenTwoIsReturned) { + uint32_t simd = 32; + EXPECT_EQ(1u, getGRFsPerThread(simd, 33)); } TEST(LocalID, GivenSimd32AndLws33WhenGettingThreadsPerWorkgroupThenTwoIsReturned) {