mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
Revert "fix: count active modules for enabling per-dispatch private memory"
This reverts commit a483b361f9.
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
621ceaf9ec
commit
2dd9940f60
@@ -12,6 +12,7 @@
|
||||
#include "shared/source/compiler_interface/compiler_options_extra.h"
|
||||
#include "shared/source/compiler_interface/compiler_warnings/compiler_warnings.h"
|
||||
#include "shared/source/compiler_interface/external_functions.h"
|
||||
#include "shared/source/compiler_interface/intermediate_representations.h"
|
||||
#include "shared/source/compiler_interface/linker.h"
|
||||
#include "shared/source/debugger/debugger_l0.h"
|
||||
#include "shared/source/device/device.h"
|
||||
@@ -52,9 +53,9 @@
|
||||
#include "program_debug_data.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace L0 {
|
||||
|
||||
namespace BuildOptions {
|
||||
@@ -1279,6 +1280,7 @@ ze_result_t ModuleImp::getKernelNames(uint32_t *pCount, const char **pNames) {
|
||||
}
|
||||
|
||||
void ModuleImp::checkIfPrivateMemoryPerDispatchIsNeeded() {
|
||||
size_t modulePrivateMemorySize = 0;
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
for (auto &kernelImmData : this->kernelImmDatas) {
|
||||
if (0 == kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize) {
|
||||
@@ -1286,26 +1288,17 @@ void ModuleImp::checkIfPrivateMemoryPerDispatchIsNeeded() {
|
||||
}
|
||||
auto kernelPrivateMemorySize = NEO::KernelHelper::getPrivateSurfaceSize(kernelImmData->getDescriptor().kernelAttributes.perHwThreadPrivateMemorySize,
|
||||
neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
|
||||
this->privateMemorySize += kernelPrivateMemorySize;
|
||||
modulePrivateMemorySize += kernelPrivateMemorySize;
|
||||
}
|
||||
|
||||
this->allocatePrivateMemoryPerDispatch = false;
|
||||
if (this->privateMemorySize > 0U) {
|
||||
if (modulePrivateMemorySize > 0U) {
|
||||
auto deviceBitfield = neoDevice->getDeviceBitfield();
|
||||
auto globalMemorySize = neoDevice->getRootDevice()->getGlobalMemorySize(static_cast<uint32_t>(deviceBitfield.to_ulong()));
|
||||
auto numSubDevices = deviceBitfield.count();
|
||||
auto allSubDevicePrivateMemorySize = this->privateMemorySize * numSubDevices;
|
||||
float maxPercentage = 0.25f;
|
||||
if (NEO::debugManager.flags.MaxKernelManagedPrivateMemoryPercent.get() > 0) {
|
||||
maxPercentage = NEO::debugManager.flags.MaxKernelManagedPrivateMemoryPercent.get() / 100.0f;
|
||||
}
|
||||
auto privateMemorySizeLock = neoDevice->getMemoryManager()->lockKernelManagedPrivateMemorySize();
|
||||
this->allocatePrivateMemoryPerDispatch = (neoDevice->getMemoryManager()->getKernelManagedPrivateMemorySize() + allSubDevicePrivateMemorySize) > static_cast<size_t>(globalMemorySize * maxPercentage);
|
||||
if (!this->allocatePrivateMemoryPerDispatch) {
|
||||
neoDevice->getMemoryManager()->registerKernelManagedPrivateMemorySize(allSubDevicePrivateMemorySize);
|
||||
}
|
||||
this->allocatePrivateMemoryPerDispatch = modulePrivateMemorySize * numSubDevices > globalMemorySize;
|
||||
PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Private Memory Per Dispatch %d for modulePrivateMemorySize %zu subDevices %zu globalMemorySize %" PRIu64 "\n",
|
||||
this->allocatePrivateMemoryPerDispatch, this->privateMemorySize, numSubDevices, globalMemorySize);
|
||||
this->allocatePrivateMemoryPerDispatch, modulePrivateMemorySize, numSubDevices, globalMemorySize);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1577,13 +1570,6 @@ ze_result_t ModuleImp::destroy() {
|
||||
}
|
||||
}
|
||||
|
||||
if (!this->allocatePrivateMemoryPerDispatch) {
|
||||
auto neoDevice = this->device->getNEODevice();
|
||||
auto allSubDevicePrivateMemorySize = neoDevice->getDeviceBitfield().count() * this->privateMemorySize;
|
||||
auto privateMemorySizeLock = neoDevice->getMemoryManager()->lockKernelManagedPrivateMemorySize();
|
||||
neoDevice->getMemoryManager()->unregisterKernelManagedPrivateMemorySize(allSubDevicePrivateMemorySize);
|
||||
}
|
||||
|
||||
delete this;
|
||||
|
||||
if (tempDevice->getL0Debugger() && tempHandle != 0) {
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include "igfxfmid.h"
|
||||
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
@@ -215,7 +216,6 @@ struct ModuleImp : public Module {
|
||||
uint32_t profileFlags = 0;
|
||||
uint64_t moduleLoadAddress = std::numeric_limits<uint64_t>::max();
|
||||
size_t isaAllocationPageSize = 0;
|
||||
size_t privateMemorySize = 0;
|
||||
|
||||
NEO::Linker::PatchableSegments isaSegmentsForPatching;
|
||||
std::vector<std::vector<char>> patchedIsaTempStorage;
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
|
||||
#include "shared/test/common/mocks/mock_compilers.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
@@ -1304,14 +1305,12 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichTogethe
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExceedGlobalMemSizePercentWhenAppendLaunchKernelWithParamsIsCalledThenNoAllocationIsDone, MatchAny) {
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExceedGlobalMemSizeWhenAppendLaunchKernelWithParamsIsCalledThenNoAllocationIsDone, MatchAny) {
|
||||
|
||||
debugManager.flags.MaxKernelManagedPrivateMemoryPercent.set(33);
|
||||
auto devInfo = device->getNEODevice()->getDeviceInfo();
|
||||
auto kernelsNb = 2u;
|
||||
uint32_t margin128KB = 131072u;
|
||||
auto maxModulePrivateMemorySize = static_cast<uint64_t>(devInfo.globalMemSize * (debugManager.flags.MaxKernelManagedPrivateMemoryPercent.get() / 100.0f));
|
||||
auto underAllocSize = static_cast<uint32_t>(maxModulePrivateMemorySize / kernelsNb / devInfo.computeUnitsUsedForScratch) - margin128KB;
|
||||
auto underAllocSize = static_cast<uint32_t>(devInfo.globalMemSize / kernelsNb / devInfo.computeUnitsUsedForScratch) - margin128KB;
|
||||
auto kernelNames = std::array<std::string, 2u>{"test1", "test2"};
|
||||
|
||||
auto &kernelImmDatas = this->module->kernelImmDatas;
|
||||
@@ -1341,37 +1340,6 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenTwoKernelPrivateAllocsWhichDontExc
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), 0u);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, givenKernelPrivateAllocWhichExceedGlobalMemSizePercentWhenAppendLaunchKernelWithParamsIsCalledThenAllocationIsDone, MatchAny) {
|
||||
|
||||
debugManager.flags.MaxKernelManagedPrivateMemoryPercent.set(80);
|
||||
auto devInfo = device->getNEODevice()->getDeviceInfo();
|
||||
auto kernelPrivateMemorySize = static_cast<uint64_t>(devInfo.globalMemSize * ((debugManager.flags.MaxKernelManagedPrivateMemoryPercent.get() + 1) / 100.0f));
|
||||
auto underAllocSize = static_cast<uint32_t>(kernelPrivateMemorySize / devInfo.computeUnitsUsedForScratch);
|
||||
|
||||
auto &kernelImmDatas = this->module->kernelImmDatas;
|
||||
auto &kernelDesc = const_cast<KernelDescriptor &>(kernelImmDatas[0]->getDescriptor());
|
||||
kernelDesc.kernelAttributes.perHwThreadPrivateMemorySize = underAllocSize;
|
||||
kernelDesc.kernelAttributes.flags.usesPrintf = false;
|
||||
kernelDesc.kernelMetadata.kernelName = "test1";
|
||||
|
||||
EXPECT_FALSE(this->module->shouldAllocatePrivateMemoryPerDispatch());
|
||||
this->module->checkIfPrivateMemoryPerDispatchIsNeeded();
|
||||
EXPECT_TRUE(this->module->shouldAllocatePrivateMemoryPerDispatch());
|
||||
|
||||
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
pCommandList->device = this->module->getDevice();
|
||||
auto memoryMgr = static_cast<OsAgnosticMemoryManager *>(pCommandList->device->getNEODevice()->getExecutionEnvironment()->memoryManager.get());
|
||||
memoryMgr->turnOnFakingBigAllocations();
|
||||
|
||||
auto kernels = std::vector<std::unique_ptr<WhiteBox<::L0::KernelImp>>>();
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), 0u);
|
||||
kernels.push_back(this->createKernelWithName("test1"));
|
||||
pCommandList->allocateOrReuseKernelPrivateMemoryIfNeeded(kernels[0].get(),
|
||||
kernels[0]->getKernelDescriptor().kernelAttributes.perHwThreadPrivateMemorySize);
|
||||
EXPECT_EQ(pCommandList->getOwnedPrivateAllocationsSize(), 1u);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamPropertiesIsCalledThenCorrectThreadArbitrationPolicyIsSet, MatchAny) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1);
|
||||
|
||||
@@ -385,7 +385,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, AllowZeroCopyWithoutCoherency, -1, "Use cachelin
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostPtrTracking, -1, "Enable host ptr tracking: -1 - default platform setting, 0 - disabled, 1 - enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MaxHwThreadsPercent, 0, "If not zero then maximum number of used HW threads is capped to max * MaxHwThreadsPercent / 100")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MinHwThreadsUnoccupied, 0, "If not zero then maximum number of used HW threads is reduced by MinHwThreadsUnoccupied")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MaxKernelManagedPrivateMemoryPercent, 0, "If not zero then maximum amount of kernel-managed private memory is capped to MaxGlobalMemory * MaxKernelManagedPrivateMemoryPercent / 100")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushEveryEnqueueCount, -1, "If greater than 0, driver performs implicit flush every N submissions.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForNewResource, -1, "-1: platform specific, 0: force disable, 1: force enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PerformImplicitFlushForIdleGpu, -1, "-1: platform specific, 0: force disable, 1: force enable")
|
||||
|
||||
@@ -64,7 +64,6 @@ MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : execu
|
||||
secondaryEngines.resize(rootEnvCount + 1);
|
||||
localMemAllocsSize = std::make_unique<std::atomic<size_t>[]>(rootEnvCount);
|
||||
sysMemAllocsSize.store(0u);
|
||||
kernelManagedPrivateMemorySize = 0u;
|
||||
|
||||
for (uint32_t rootDeviceIndex = 0; rootDeviceIndex < rootEnvCount; ++rootDeviceIndex) {
|
||||
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[rootDeviceIndex];
|
||||
|
||||
@@ -272,9 +272,6 @@ class MemoryManager {
|
||||
virtual AllocationStatus registerSysMemAlloc(GraphicsAllocation *allocation);
|
||||
virtual AllocationStatus registerLocalMemAlloc(GraphicsAllocation *allocation, uint32_t rootDeviceIndex);
|
||||
|
||||
void registerKernelManagedPrivateMemorySize(size_t size) { this->kernelManagedPrivateMemorySize += size; };
|
||||
void unregisterKernelManagedPrivateMemorySize(size_t size) { this->kernelManagedPrivateMemorySize -= size; };
|
||||
|
||||
virtual bool setMemAdvise(GraphicsAllocation *gfxAllocation, MemAdviseFlags flags, uint32_t rootDeviceIndex) { return true; }
|
||||
virtual bool setMemPrefetch(GraphicsAllocation *gfxAllocation, SubDeviceIdsVec &subDeviceIds, uint32_t rootDeviceIndex) { return true; }
|
||||
virtual bool setAtomicAccess(GraphicsAllocation *gfxAllocation, size_t size, AtomicAccessMode mode, uint32_t rootDeviceIndex) { return true; }
|
||||
@@ -335,8 +332,6 @@ class MemoryManager {
|
||||
|
||||
size_t getUsedLocalMemorySize(uint32_t rootDeviceIndex) const { return localMemAllocsSize[rootDeviceIndex]; }
|
||||
size_t getUsedSystemMemorySize() const { return sysMemAllocsSize; }
|
||||
size_t getKernelManagedPrivateMemorySize() const { return kernelManagedPrivateMemorySize; }
|
||||
[[nodiscard]] std::unique_lock<std::mutex> lockKernelManagedPrivateMemorySize() { return std::unique_lock<std::mutex>(this->kernelManagedPrivateMemorySizeMutex); };
|
||||
uint32_t getFirstContextIdForRootDevice(uint32_t rootDeviceIndex);
|
||||
|
||||
virtual void getExtraDeviceProperties(uint32_t rootDeviceIndex, uint32_t *moduleId, uint16_t *serverType) { return; }
|
||||
@@ -433,8 +428,6 @@ class MemoryManager {
|
||||
std::mutex physicalMemoryAllocationMapMutex;
|
||||
std::unique_ptr<std::atomic<size_t>[]> localMemAllocsSize;
|
||||
std::atomic<size_t> sysMemAllocsSize;
|
||||
size_t kernelManagedPrivateMemorySize;
|
||||
std::mutex kernelManagedPrivateMemorySizeMutex;
|
||||
size_t hostAllocationsSavedForReuseSize = 0u;
|
||||
mutable std::mutex hostAllocationsReuseMtx;
|
||||
std::map<std::pair<AllocationType, bool>, CustomHeapAllocatorConfig> customHeapAllocators;
|
||||
|
||||
@@ -221,7 +221,6 @@ ReturnRawGpuTimestamps = 0
|
||||
EnableDeviceBasedTimestamps = 1
|
||||
MaxHwThreadsPercent = 0
|
||||
MinHwThreadsUnoccupied = 0
|
||||
MaxKernelManagedPrivateMemoryPercent = 0
|
||||
LimitBlitterMaxWidth = -1
|
||||
LimitBlitterMaxHeight = -1
|
||||
PostBlitCommand = -1
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/compiler_interface/external_functions.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/blit_helper.h"
|
||||
#include "shared/source/helpers/surface_format_info.h"
|
||||
@@ -228,18 +229,6 @@ TEST(MemoryManagerTest, givenFailureOnRegisterLocalMemoryAllocationWhenAllocatin
|
||||
EXPECT_EQ(nullptr, memoryManager.allocateGraphicsMemoryWithProperties(properties));
|
||||
}
|
||||
|
||||
TEST(MemoryManagerTest, givenDifferentSizesWhenRegisteringAndUnregisteringModulePrivateMemorySizesThenCorrectValuesAreReturned) {
|
||||
MockMemoryManager memoryManager(true, true);
|
||||
auto privateMemorySizeLock = memoryManager.lockKernelManagedPrivateMemorySize();
|
||||
EXPECT_EQ(0u, memoryManager.getKernelManagedPrivateMemorySize());
|
||||
memoryManager.registerKernelManagedPrivateMemorySize(1234u);
|
||||
EXPECT_EQ(1234u, memoryManager.getKernelManagedPrivateMemorySize());
|
||||
memoryManager.unregisterKernelManagedPrivateMemorySize(1000u);
|
||||
EXPECT_EQ(234u, memoryManager.getKernelManagedPrivateMemorySize());
|
||||
memoryManager.unregisterKernelManagedPrivateMemorySize(234u);
|
||||
EXPECT_EQ(0u, memoryManager.getKernelManagedPrivateMemorySize());
|
||||
}
|
||||
|
||||
using MemoryhManagerMultiContextResourceTests = ::testing::Test;
|
||||
HWTEST_F(MemoryhManagerMultiContextResourceTests, givenAllocationUsedByManyOsContextsWhenCheckingUsageBeforeDestroyThenMultiContextDestructorIsUsedForWaitingForAllOsContexts) {
|
||||
auto executionEnvironment = new MockExecutionEnvironment(defaultHwInfo.get(), true, 2);
|
||||
|
||||
Reference in New Issue
Block a user