mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
fix: count active modules for enabling per-dispatch private memory
Related-To: NEO-13086 Signed-off-by: Wenbin Lu <wenbin.lu@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
fece6956c6
commit
a483b361f9
@@ -12,7 +12,6 @@
|
||||
#include "shared/source/compiler_interface/compiler_options_extra.h"
|
||||
#include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h"
|
||||
#include "shared/source/compiler_interface/external_functions.h"
|
||||
#include "shared/source/compiler_interface/intermediate_representations.h"
|
||||
#include "shared/source/compiler_interface/linker.h"
|
||||
#include "shared/source/debugger/debugger_l0.h"
|
||||
#include "shared/source/device/device.h"
|
||||
@@ -53,9 +52,9 @@
|
||||
#include "program_debug_data.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
namespace BuildOptions {
|
||||
@@ -1280,7 +1279,6 @@ ze_result_t ModuleImp::getKernelNames(uint32_t *pCount, const char **pNames) {
|
||||
}
|
||||
|
||||
void ModuleImp::checkIfPrivateMemoryPerDispatchIsNeeded() {
|
||||
size_t modulePrivateMemorySize = 0;
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
for (auto &kernelImmData : this->kernelImmDatas) {
|
||||
if (0 == kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) {
|
||||
@@ -1288,17 +1286,26 @@ void ModuleImp::checkIfPrivateMemoryPerDispatchIsNeeded() {
|
||||
}
|
||||
auto kernelPrivateMemorySize = NEO::KernelHelper::getPrivateSurfaceSize(kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize,
|
||||
neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
|
||||
modulePrivateMemorySize += kernelPrivateMemorySize;
|
||||
this->privateMemorySize += kernelPrivateMemorySize;
|
||||
}
|
||||
|
||||
this->allocatePrivateMemoryPerDispatch = false;
|
||||
if (modulePrivateMemorySize > 0U) {
|
||||
if (this->privateMemorySize > 0U) {
|
||||
auto deviceBitfield = neoDevice->getDeviceBitfield();
|
||||
auto globalMemorySize = neoDevice->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(deviceBitfield.to_ulong()));
|
||||
auto numSubDevices = deviceBitfield.count();
|
||||
this->allocatePrivateMemoryPerDispatch = modulePrivateMemorySize * numSubDevices > globalMemorySize;
|
||||
auto allSubDevicePrivateMemorySize = this->privateMemorySize * numSubDevices;
|
||||
float maxPercentage = 0.25f;
|
||||
if (NEO::debugManager.flags.MaxKernelManagedPrivateMemoryPercent.get() > 0) {
|
||||
maxPercentage = NEO::debugManager.flags.MaxKernelManagedPrivateMemoryPercent.get() / 100.0f;
|
||||
}
|
||||
auto privateMemorySizeLock = neoDevice->getMemoryManager()->lockKernelManagedPrivateMemorySize();
|
||||
this->allocatePrivateMemoryPerDispatch = (neoDevice->getMemoryManager()->getKernelManagedPrivateMemorySize() + allSubDevicePrivateMemorySize) > static_cast<size_t>(globalMemorySize * maxPercentage);
|
||||
if (!this->allocatePrivateMemoryPerDispatch) {
|
||||
neoDevice->getMemoryManager()->registerKernelManagedPrivateMemorySize(allSubDevicePrivateMemorySize);
|
||||
}
|
||||
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Private Memory Per Dispatch %d for modulePrivateMemorySize %zu subDevices %zu globalMemorySize %" PRIu64 "\n",
|
||||
this->allocatePrivateMemoryPerDispatch, modulePrivateMemorySize, numSubDevices, globalMemorySize);
|
||||
this->allocatePrivateMemoryPerDispatch, this->privateMemorySize, numSubDevices, globalMemorySize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1570,6 +1577,13 @@ ze_result_t ModuleImp::destroy() {
|
||||
}
|
||||
}
|
||||
|
||||
if (!this->allocatePrivateMemoryPerDispatch) {
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
auto allSubDevicePrivateMemorySize = neoDevice->getDeviceBitfield().count() * this->privateMemorySize;
|
||||
auto privateMemorySizeLock = neoDevice->getMemoryManager()->lockKernelManagedPrivateMemorySize();
|
||||
neoDevice->getMemoryManager()->unregisterKernelManagedPrivateMemorySize(allSubDevicePrivateMemorySize);
|
||||
}
|
||||
|
||||
delete this;
|
||||
|
||||
if (tempDevice->getL0Debugger() && tempHandle != 0) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
#include "igfxfmid.h"
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
@@ -216,6 +215,7 @@ struct ModuleImp : public Module {
|
||||
uint32_t profileFlags = 0;
|
||||
uint64_t moduleLoadAddress = std::numeric_limits<uint64_t>::max();
|
||||
size_t isaAllocationPageSize = 0;
|
||||
size_t privateMemorySize = 0;
|
||||
|
||||
NEO::Linker::PatchableSegments isaSegmentsForPatching;
|
||||
std::vector<std::vector<char>> patchedIsaTempStorage;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -16,7 +16,6 @@
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
|
||||
#include "shared/test/common/mocks/mock_compilers.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
@@ -1305,12 +1304,14 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichTogethe
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExceedGlobalMemSizeWhenAppendLaunchKernelWithParamsIsCalledThenNoAllocationIsDone, MatchAny) {
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExceedGlobalMemSizePercentWhenAppendLaunchKernelWithParamsIsCalledThenNoAllocationIsDone, MatchAny) {
|
||||
|
||||
debugManager.flags.MaxKernelManagedPrivateMemoryPercent.set(33);
|
||||
auto devInfo = device->getNEODevice()->getDeviceInfo();
|
||||
auto kernelsNb = 2u;
|
||||
uint32_t margin128KB = 131072u;
|
||||
auto underAllocSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) - margin128KB;
|
||||
auto maxModulePrivateMemorySize = static_cast<uint64_t>(devInfo.globalMemSize * (debugManager.flags.MaxKernelManagedPrivateMemoryPercent.get() / 100.0f));
|
||||
auto underAllocSize = static_cast<uint32_t>(maxModulePrivateMemorySize / kernelsNb / devInfo.computeUnitsUsedForScratch) - margin128KB;
|
||||
auto kernelNames = std::array<std::string, 2u>{"test1", "test2"};
|
||||
|
||||
auto &kernelImmDatas = this->module->kernelImmDatas;
|
||||
@@ -1340,6 +1341,37 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExc
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), 0u);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelPrivateAllocWhichExceedGlobalMemSizePercentWhenAppendLaunchKernelWithParamsIsCalledThenAllocationIsDone, MatchAny) {
|
||||
|
||||
debugManager.flags.MaxKernelManagedPrivateMemoryPercent.set(80);
|
||||
auto devInfo = device->getNEODevice()->getDeviceInfo();
|
||||
auto kernelPrivateMemorySize = static_cast<uint64_t>(devInfo.globalMemSize * ((debugManager.flags.MaxKernelManagedPrivateMemoryPercent.get() + 1) / 100.0f));
|
||||
auto underAllocSize = static_cast<uint32_t>(kernelPrivateMemorySize / devInfo.computeUnitsUsedForScratch);
|
||||
|
||||
auto &kernelImmDatas = this->module->kernelImmDatas;
|
||||
auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[0]->getDescriptor());
|
||||
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = underAllocSize;
|
||||
kernelDesc.kernelAttributes.flags.usesPrintf = false;
|
||||
kernelDesc.kernelMetadata.kernelName = "test1";
|
||||
|
||||
EXPECT_FALSE(this->module->shouldAllocatePrivateMemoryPerDispatch());
|
||||
this->module->checkIfPrivateMemoryPerDispatchIsNeeded();
|
||||
EXPECT_TRUE(this->module->shouldAllocatePrivateMemoryPerDispatch());
|
||||
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
pCommandList->device = this->module->getDevice();
|
||||
auto memoryMgr = static_cast<OsAgnosticMemoryManager *>(pCommandList->device->getNEODevice()->getExecutionEnvironment()->memoryManager.get());
|
||||
memoryMgr->turnOnFakingBigAllocations();
|
||||
|
||||
auto kernels = std::vector<std::unique_ptr<WhiteBox<::L0::KernelImp>>>();
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), 0u);
|
||||
kernels.push_back(this->createKernelWithName("test1"));
|
||||
pCommandList->allocateOrReuseKernelPrivateMemoryIfNeeded(kernels[0].get(),
|
||||
kernels[0]->getKernelDescriptor().kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), 1u);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamPropertiesIsCalledThenCorrectThreadArbitrationPolicyIsSet, MatchAny) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1);
|
||||
|
||||
Reference in New Issue
Block a user