From bc0a3a7eb59d033c57831717f0f6df7663042646 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Fri, 23 Jun 2023 10:20:15 +0000 Subject: [PATCH] fix: Consider slm size in suggest work group cache Signed-off-by: Lukasz Jobczyk --- level_zero/core/source/kernel/kernel_imp.cpp | 11 +- level_zero/core/source/kernel/kernel_imp.h | 10 +- .../core/test/unit_tests/mocks/mock_kernel.h | 1 + .../sources/kernel/test_kernel_2.cpp | 100 ++++++++++++------ 4 files changed, 82 insertions(+), 40 deletions(-) diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index a6afdebe8e..09b9df0fc4 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -383,12 +383,13 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz dim = (globalSizeZ > 1U) ? 3 : dim; auto cachedGroupSize = std::find_if(this->suggestGroupSizeCache.begin(), this->suggestGroupSizeCache.end(), [&](const auto &other) { - return other.first == workItems; + return other.groupSize == workItems && + other.slmArgsTotalSize == this->getSlmTotalSize(); }); if (cachedGroupSize != this->suggestGroupSizeCache.end()) { - *groupSizeX = static_cast(cachedGroupSize->second.x); - *groupSizeY = static_cast(cachedGroupSize->second.y); - *groupSizeZ = static_cast(cachedGroupSize->second.z); + *groupSizeX = static_cast(cachedGroupSize->suggestedGroupSize.x); + *groupSizeY = static_cast(cachedGroupSize->suggestedGroupSize.y); + *groupSizeZ = static_cast(cachedGroupSize->suggestedGroupSize.z); return ZE_RESULT_SUCCESS; } @@ -420,7 +421,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz *groupSizeX = static_cast(retGroupSize[0]); *groupSizeY = static_cast(retGroupSize[1]); *groupSizeZ = static_cast(retGroupSize[2]); - this->suggestGroupSizeCache.push_back(std::make_pair(Vec3(workItems), Vec3(retGroupSize))); + this->suggestGroupSizeCache.emplace_back(workItems, this->getSlmTotalSize(), retGroupSize); return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/kernel/kernel_imp.h b/level_zero/core/source/kernel/kernel_imp.h index 2e9ea1e62a..fcb47a8a8f 100644 --- a/level_zero/core/source/kernel/kernel_imp.h +++ b/level_zero/core/source/kernel/kernel_imp.h @@ -241,8 +241,14 @@ struct KernelImp : Kernel { std::unique_ptr pExtension; std::mutex printfLock; - using SuggestGroupSizeCacheT = std::vector, Vec3>>; - SuggestGroupSizeCacheT suggestGroupSizeCache; + struct SuggestGroupSizeCacheEntry { + Vec3 groupSize; + uint32_t slmArgsTotalSize = 0u; + + Vec3 suggestedGroupSize; + SuggestGroupSizeCacheEntry(size_t groupSize[3], uint32_t slmArgsTotalSize, size_t suggestedGroupSize[3]) : groupSize(groupSize), slmArgsTotalSize(slmArgsTotalSize), suggestedGroupSize(suggestedGroupSize){}; + }; + std::vector suggestGroupSizeCache; }; } // namespace L0 diff --git a/level_zero/core/test/unit_tests/mocks/mock_kernel.h b/level_zero/core/test/unit_tests/mocks/mock_kernel.h index 28f24d4724..dc5d82fabf 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_kernel.h +++ b/level_zero/core/test/unit_tests/mocks/mock_kernel.h @@ -64,6 +64,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp { using ::L0::KernelImp::requiredWorkgroupOrder; using ::L0::KernelImp::residencyContainer; using ::L0::KernelImp::setAssertBuffer; + using ::L0::KernelImp::slmArgsTotalSize; using ::L0::KernelImp::suggestGroupSizeCache; using ::L0::KernelImp::surfaceStateHeapData; using ::L0::KernelImp::surfaceStateHeapDataSize; diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp index 73926ff544..2c8ebbceb4 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel_2.cpp @@ -129,52 +129,86 @@ TEST_F(KernelImp, WhenSuggestingGroupSizeThenCacheValues) { kernel.module = &module; EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 0u); + EXPECT_EQ(kernel.getSlmTotalSize(), 0u); uint32_t groupSize[3]; kernel.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2); EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[0], 256u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[1], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[2], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], 8u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], groupSize[0]); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], groupSize[1]); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], groupSize[2]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].slmArgsTotalSize, 0u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[0], 256u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], 8u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], groupSize[0]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], groupSize[1]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], groupSize[2]); kernel.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2); EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[0], 256u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[1], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[2], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], 8u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], groupSize[0]); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], groupSize[1]); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], groupSize[2]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].slmArgsTotalSize, 0u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[0], 256u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], 8u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], groupSize[0]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], groupSize[1]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], groupSize[2]); kernel.KernelImp::suggestGroupSize(2048, 1, 1, groupSize, groupSize + 1, groupSize + 2); EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 2u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[0], 256u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[1], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[2], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], 8u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[1].first[0], 2048u); - EXPECT_EQ(kernel.suggestGroupSizeCache[1].first[1], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[1].first[2], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[1].second[0], 8u); - EXPECT_EQ(kernel.suggestGroupSizeCache[1].second[1], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[1].second[2], 1u); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], groupSize[0]); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], groupSize[1]); - EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], groupSize[2]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].slmArgsTotalSize, 0u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[0], 256u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], 8u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].slmArgsTotalSize, 0u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[0], 2048u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[0], 8u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], groupSize[0]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], groupSize[1]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], groupSize[2]); + + kernel.slmArgsTotalSize = 1; + kernel.KernelImp::suggestGroupSize(2048, 1, 1, groupSize, groupSize + 1, groupSize + 2); + + EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 3u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].slmArgsTotalSize, 0u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[0], 256u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], 8u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].slmArgsTotalSize, 0u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[0], 2048u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[0], 8u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[2].slmArgsTotalSize, 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[2].groupSize[0], 2048u); + EXPECT_EQ(kernel.suggestGroupSizeCache[2].groupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[2].groupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[2].suggestedGroupSize[0], 8u); + EXPECT_EQ(kernel.suggestGroupSizeCache[2].suggestedGroupSize[1], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[2].suggestedGroupSize[2], 1u); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], groupSize[0]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], groupSize[1]); + EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], groupSize[2]); } class KernelImpSuggestGroupSize : public DeviceFixture, public ::testing::TestWithParam {