fix: Consider slm size in suggest work group cache
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
97b4d8bab5
commit
bc0a3a7eb5
|
@ -383,12 +383,13 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
|
||||||
dim = (globalSizeZ > 1U) ? 3 : dim;
|
dim = (globalSizeZ > 1U) ? 3 : dim;
|
||||||
|
|
||||||
auto cachedGroupSize = std::find_if(this->suggestGroupSizeCache.begin(), this->suggestGroupSizeCache.end(), [&](const auto &other) {
|
auto cachedGroupSize = std::find_if(this->suggestGroupSizeCache.begin(), this->suggestGroupSizeCache.end(), [&](const auto &other) {
|
||||||
return other.first == workItems;
|
return other.groupSize == workItems &&
|
||||||
|
other.slmArgsTotalSize == this->getSlmTotalSize();
|
||||||
});
|
});
|
||||||
if (cachedGroupSize != this->suggestGroupSizeCache.end()) {
|
if (cachedGroupSize != this->suggestGroupSizeCache.end()) {
|
||||||
*groupSizeX = static_cast<uint32_t>(cachedGroupSize->second.x);
|
*groupSizeX = static_cast<uint32_t>(cachedGroupSize->suggestedGroupSize.x);
|
||||||
*groupSizeY = static_cast<uint32_t>(cachedGroupSize->second.y);
|
*groupSizeY = static_cast<uint32_t>(cachedGroupSize->suggestedGroupSize.y);
|
||||||
*groupSizeZ = static_cast<uint32_t>(cachedGroupSize->second.z);
|
*groupSizeZ = static_cast<uint32_t>(cachedGroupSize->suggestedGroupSize.z);
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -420,7 +421,7 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
|
||||||
*groupSizeX = static_cast<uint32_t>(retGroupSize[0]);
|
*groupSizeX = static_cast<uint32_t>(retGroupSize[0]);
|
||||||
*groupSizeY = static_cast<uint32_t>(retGroupSize[1]);
|
*groupSizeY = static_cast<uint32_t>(retGroupSize[1]);
|
||||||
*groupSizeZ = static_cast<uint32_t>(retGroupSize[2]);
|
*groupSizeZ = static_cast<uint32_t>(retGroupSize[2]);
|
||||||
this->suggestGroupSizeCache.push_back(std::make_pair(Vec3(workItems), Vec3(retGroupSize)));
|
this->suggestGroupSizeCache.emplace_back(workItems, this->getSlmTotalSize(), retGroupSize);
|
||||||
|
|
||||||
return ZE_RESULT_SUCCESS;
|
return ZE_RESULT_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
|
@ -241,8 +241,14 @@ struct KernelImp : Kernel {
|
||||||
std::unique_ptr<KernelExt> pExtension;
|
std::unique_ptr<KernelExt> pExtension;
|
||||||
std::mutex printfLock;
|
std::mutex printfLock;
|
||||||
|
|
||||||
using SuggestGroupSizeCacheT = std::vector<std::pair<Vec3<size_t>, Vec3<size_t>>>;
|
struct SuggestGroupSizeCacheEntry {
|
||||||
SuggestGroupSizeCacheT suggestGroupSizeCache;
|
Vec3<size_t> groupSize;
|
||||||
|
uint32_t slmArgsTotalSize = 0u;
|
||||||
|
|
||||||
|
Vec3<size_t> suggestedGroupSize;
|
||||||
|
SuggestGroupSizeCacheEntry(size_t groupSize[3], uint32_t slmArgsTotalSize, size_t suggestedGroupSize[3]) : groupSize(groupSize), slmArgsTotalSize(slmArgsTotalSize), suggestedGroupSize(suggestedGroupSize){};
|
||||||
|
};
|
||||||
|
std::vector<SuggestGroupSizeCacheEntry> suggestGroupSizeCache;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -64,6 +64,7 @@ struct WhiteBox<::L0::Kernel> : public ::L0::KernelImp {
|
||||||
using ::L0::KernelImp::requiredWorkgroupOrder;
|
using ::L0::KernelImp::requiredWorkgroupOrder;
|
||||||
using ::L0::KernelImp::residencyContainer;
|
using ::L0::KernelImp::residencyContainer;
|
||||||
using ::L0::KernelImp::setAssertBuffer;
|
using ::L0::KernelImp::setAssertBuffer;
|
||||||
|
using ::L0::KernelImp::slmArgsTotalSize;
|
||||||
using ::L0::KernelImp::suggestGroupSizeCache;
|
using ::L0::KernelImp::suggestGroupSizeCache;
|
||||||
using ::L0::KernelImp::surfaceStateHeapData;
|
using ::L0::KernelImp::surfaceStateHeapData;
|
||||||
using ::L0::KernelImp::surfaceStateHeapDataSize;
|
using ::L0::KernelImp::surfaceStateHeapDataSize;
|
||||||
|
|
|
@ -129,52 +129,86 @@ TEST_F(KernelImp, WhenSuggestingGroupSizeThenCacheValues) {
|
||||||
kernel.module = &module;
|
kernel.module = &module;
|
||||||
|
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 0u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 0u);
|
||||||
|
EXPECT_EQ(kernel.getSlmTotalSize(), 0u);
|
||||||
|
|
||||||
uint32_t groupSize[3];
|
uint32_t groupSize[3];
|
||||||
kernel.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
kernel.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||||
|
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[0], 256u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].slmArgsTotalSize, 0u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[1], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[0], 256u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[2], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[1], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], 8u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[2], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], 8u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], groupSize[0]);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], groupSize[1]);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], groupSize[0]);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], groupSize[2]);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], groupSize[1]);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], groupSize[2]);
|
||||||
|
|
||||||
kernel.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
kernel.KernelImp::suggestGroupSize(256, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||||
|
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[0], 256u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].slmArgsTotalSize, 0u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[1], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[0], 256u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[2], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[1], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], 8u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[2], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], 8u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], groupSize[0]);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], groupSize[1]);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], groupSize[0]);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], groupSize[2]);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], groupSize[1]);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], groupSize[2]);
|
||||||
|
|
||||||
kernel.KernelImp::suggestGroupSize(2048, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
kernel.KernelImp::suggestGroupSize(2048, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||||
|
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 2u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 2u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[0], 256u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].slmArgsTotalSize, 0u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[1], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[0], 256u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].first[2], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[1], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], 8u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[2], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], 8u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[1].first[0], 2048u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[1].first[1], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].slmArgsTotalSize, 0u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[1].first[2], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[0], 2048u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[1].second[0], 8u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[1], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[1].second[1], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[2], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[1].second[2], 1u);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[0], 8u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[0], groupSize[0]);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[1], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[1], groupSize[1]);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[2], 1u);
|
||||||
EXPECT_EQ(kernel.suggestGroupSizeCache[0].second[2], groupSize[2]);
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], groupSize[0]);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], groupSize[1]);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], groupSize[2]);
|
||||||
|
|
||||||
|
kernel.slmArgsTotalSize = 1;
|
||||||
|
kernel.KernelImp::suggestGroupSize(2048, 1, 1, groupSize, groupSize + 1, groupSize + 2);
|
||||||
|
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache.size(), 3u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].slmArgsTotalSize, 0u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[0], 256u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[1], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].groupSize[2], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], 8u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].slmArgsTotalSize, 0u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[0], 2048u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[1], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].groupSize[2], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[0], 8u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[1], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[1].suggestedGroupSize[2], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[2].slmArgsTotalSize, 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[2].groupSize[0], 2048u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[2].groupSize[1], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[2].groupSize[2], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[2].suggestedGroupSize[0], 8u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[2].suggestedGroupSize[1], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[2].suggestedGroupSize[2], 1u);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[0], groupSize[0]);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[1], groupSize[1]);
|
||||||
|
EXPECT_EQ(kernel.suggestGroupSizeCache[0].suggestedGroupSize[2], groupSize[2]);
|
||||||
}
|
}
|
||||||
|
|
||||||
class KernelImpSuggestGroupSize : public DeviceFixture, public ::testing::TestWithParam<uint32_t> {
|
class KernelImpSuggestGroupSize : public DeviceFixture, public ::testing::TestWithParam<uint32_t> {
|
||||||
|
|
Loading…
Reference in New Issue