From 959333d9bf415a82b3c33810250b09c2151cdb50 Mon Sep 17 00:00:00 2001 From: Mateusz Jablonski Date: Tue, 23 Apr 2024 14:59:24 +0000 Subject: [PATCH] fix: respect number of tiles when getting total private memory size of module Related-To: GSD-8374 Signed-off-by: Mateusz Jablonski --- level_zero/core/source/module/module_imp.cpp | 9 +++++--- .../unit_tests/fixtures/module_fixture.cpp | 6 ++---- .../test/unit_tests/fixtures/module_fixture.h | 2 +- .../unit_tests/sources/module/test_module.cpp | 21 +++++++++++++++++++ 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/level_zero/core/source/module/module_imp.cpp b/level_zero/core/source/module/module_imp.cpp index b00856b24d..0322a167c4 100644 --- a/level_zero/core/source/module/module_imp.cpp +++ b/level_zero/core/source/module/module_imp.cpp @@ -1286,19 +1286,22 @@ void ModuleImp::verifyDebugCapabilities() { void ModuleImp::checkIfPrivateMemoryPerDispatchIsNeeded() { size_t modulePrivateMemorySize = 0; + auto neoDevice = this->device->getNEODevice(); for (auto &kernelImmData : this->kernelImmDatas) { if (0 == kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) { continue; } auto kernelPrivateMemorySize = NEO::KernelHelper::getPrivateSurfaceSize(kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize, - this->device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch); + neoDevice->getDeviceInfo().computeUnitsUsedForScratch); modulePrivateMemorySize += kernelPrivateMemorySize; } this->allocatePrivateMemoryPerDispatch = false; if (modulePrivateMemorySize > 0U) { - auto globalMemorySize = device->getNEODevice()->getRootDevice()->getGlobalMemorySize(static_cast(device->getNEODevice()->getDeviceBitfield().to_ulong())); - this->allocatePrivateMemoryPerDispatch = modulePrivateMemorySize > globalMemorySize; + auto deviceBitfield = neoDevice->getDeviceBitfield(); + auto globalMemorySize = neoDevice->getRootDevice()->getGlobalMemorySize(static_cast(deviceBitfield.to_ulong())); + auto numSubDevices = deviceBitfield.count(); + this->allocatePrivateMemoryPerDispatch = modulePrivateMemorySize * numSubDevices > globalMemorySize; } } diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp index ea22641985..323cc19656 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.cpp @@ -200,11 +200,9 @@ void MultiDeviceModuleFixture::createModuleFromMockBinary(uint32_t rootDeviceInd moduleDesc.inputSize = src.size(); ModuleBuildLog *moduleBuildLog = nullptr; - ze_result_t result = ZE_RESULT_SUCCESS; - modules[rootDeviceIndex].reset(Module::create(device, - &moduleDesc, - moduleBuildLog, ModuleType::user, &result)); + modules[rootDeviceIndex].reset(new WhiteBox<::L0::Module>{device, moduleBuildLog, ModuleType::user}); + modules[rootDeviceIndex]->initialize(&moduleDesc, device->getNEODevice()); } void MultiDeviceModuleFixture::createKernel(uint32_t rootDeviceIndex) { diff --git a/level_zero/core/test/unit_tests/fixtures/module_fixture.h b/level_zero/core/test/unit_tests/fixtures/module_fixture.h index 2f0f7bf8e6..6bcaa2149c 100644 --- a/level_zero/core/test/unit_tests/fixtures/module_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/module_fixture.h @@ -156,7 +156,7 @@ struct MultiDeviceModuleFixture : public MultiDeviceFixture { const std::string kernelName = "test"; const uint32_t numKernelArguments = 6; - std::vector> modules; + std::vector>> modules; std::unique_ptr> kernel; std::unique_ptr zebinData; }; diff --git a/level_zero/core/test/unit_tests/sources/module/test_module.cpp b/level_zero/core/test/unit_tests/sources/module/test_module.cpp index 87cd3a5a94..7b8b053148 100644 --- a/level_zero/core/test/unit_tests/sources/module/test_module.cpp +++ b/level_zero/core/test/unit_tests/sources/module/test_module.cpp @@ -4731,5 +4731,26 @@ TEST_F(ModuleKernelImmDatasTest, givenDeviceOOMWhenMemoryManagerFailsToAllocateM auto result = module->initialize(&moduleDesc, neoDevice); EXPECT_EQ(result, ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY); }; + +using MultiTileModuleTest = Test; +HWTEST2_F(MultiTileModuleTest, givenTwoKernelPrivateAllocsWhichExceedGlobalMemSizeOfSingleTileButNotEntireGlobalMemSizeThenPrivateMemoryShouldBeAllocatedPerDispatch, IsAtLeastSkl) { + auto devInfo = device->getNEODevice()->getDeviceInfo(); + auto kernelsNb = 2u; + uint32_t margin128KB = 131072u; + auto underAllocSize = static_cast(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) - margin128KB; + auto kernelNames = std::array{"test1", "test2"}; + + auto &kernelImmDatas = this->modules[0]->kernelImmDatas; + for (size_t i = 0; i < kernelsNb; i++) { + auto &kernelDesc = const_cast(kernelImmDatas[i]->getDescriptor()); + kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = underAllocSize; + kernelDesc.kernelAttributes.flags.usesPrintf = false; + kernelDesc.kernelMetadata.kernelName = kernelNames[i]; + } + + EXPECT_FALSE(this->modules[0]->shouldAllocatePrivateMemoryPerDispatch()); + this->modules[0]->checkIfPrivateMemoryPerDispatchIsNeeded(); + EXPECT_TRUE(this->modules[0]->shouldAllocatePrivateMemoryPerDispatch()); +} } // namespace ult } // namespace L0