diff --git a/shared/source/memory_manager/unified_memory_manager.cpp b/shared/source/memory_manager/unified_memory_manager.cpp index f08ee8247e..104a991464 100644 --- a/shared/source/memory_manager/unified_memory_manager.cpp +++ b/shared/source/memory_manager/unified_memory_manager.cpp @@ -297,7 +297,8 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size, if (memoryProperties.memoryType == InternalMemoryType::deviceUnifiedMemory) { unifiedMemoryProperties.flags.isUSMDeviceAllocation = true; - if (this->usmDeviceAllocationsCacheEnabled) { + if (this->usmDeviceAllocationsCacheEnabled && + false == memoryProperties.needZeroedOutAllocation) { void *allocationFromCache = this->usmDeviceAllocationsCache.get(size, memoryProperties, this); if (allocationFromCache) { return allocationFromCache; diff --git a/shared/source/memory_manager/unified_memory_manager.h b/shared/source/memory_manager/unified_memory_manager.h index 3ba83d4ca1..c8de902c15 100644 --- a/shared/source/memory_manager/unified_memory_manager.h +++ b/shared/source/memory_manager/unified_memory_manager.h @@ -138,6 +138,7 @@ class SVMAllocsManager { const RootDeviceIndicesContainer &rootDeviceIndices; const std::map &subdeviceBitfields; AllocationType requestedAllocationType = AllocationType::unknown; + bool needZeroedOutAllocation = false; }; struct SvmCacheAllocationInfo { diff --git a/shared/source/program/program_initialization.cpp b/shared/source/program/program_initialization.cpp index 1e80a20125..b42f2624a8 100644 --- a/shared/source/program/program_initialization.cpp +++ b/shared/source/program/program_initialization.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -38,6 +38,7 @@ GraphicsAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAlloc NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, subDeviceBitfields); unifiedMemoryProperties.device = &device; unifiedMemoryProperties.requestedAllocationType = allocationType; + unifiedMemoryProperties.needZeroedOutAllocation = true; auto ptr = svmAllocManager->createUnifiedMemoryAllocation(totalSize, unifiedMemoryProperties); DEBUG_BREAK_IF(ptr == nullptr); if (ptr == nullptr) { diff --git a/shared/test/common/mocks/mock_svm_manager.h b/shared/test/common/mocks/mock_svm_manager.h index c7177b0ac1..3ee522d43e 100644 --- a/shared/test/common/mocks/mock_svm_manager.h +++ b/shared/test/common/mocks/mock_svm_manager.h @@ -32,6 +32,12 @@ struct MockSVMAllocsManager : public SVMAllocsManager { prefetchMemoryCalled = true; } bool prefetchMemoryCalled = false; + + void *createUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties) override { + requestedZeroedOutAllocation = memoryProperties.needZeroedOutAllocation; + return SVMAllocsManager::createUnifiedMemoryAllocation(size, memoryProperties); + } + bool requestedZeroedOutAllocation = false; }; template diff --git a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp index 204e215364..8885ba0f13 100644 --- a/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp +++ b/shared/test/unit_test/memory_manager/unified_memory_manager_cache_tests.cpp @@ -519,6 +519,36 @@ TEST_F(SvmDeviceAllocationCacheTest, givenDeviceOutOfMemoryWhenAllocatingThenCac ASSERT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 0u); } +TEST_F(SvmDeviceAllocationCacheTest, givenAllocationWithNeedZeroedOutAllocationWhenAllocatingAfterFreeThenDoNotReuseAllocation) { + std::unique_ptr deviceFactory(new UltDeviceFactory(1, 1)); + RootDeviceIndicesContainer rootDeviceIndices = {mockRootDeviceIndex}; + std::map deviceBitfields{{mockRootDeviceIndex, mockDeviceBitfield}}; + DebugManagerStateRestore restore; + debugManager.flags.ExperimentalEnableDeviceAllocationCache.set(1); + auto device = deviceFactory->rootDevices[0]; + auto svmManager = std::make_unique(device->getMemoryManager(), false); + svmManager->initUsmAllocationsCaches(*device); + EXPECT_TRUE(svmManager->usmDeviceAllocationsCacheEnabled); + svmManager->usmDeviceAllocationsCache.maxSize = 1 * MemoryConstants::gigaByte; + + SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::deviceUnifiedMemory, 1, rootDeviceIndices, deviceBitfields); + unifiedMemoryProperties.device = device; + auto allocation = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + EXPECT_NE(allocation, nullptr); + svmManager->freeSVMAlloc(allocation); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 1u); + + unifiedMemoryProperties.needZeroedOutAllocation = true; + auto testedAllocation = svmManager->createUnifiedMemoryAllocation(10u, unifiedMemoryProperties); + EXPECT_EQ(svmManager->usmDeviceAllocationsCache.allocations.size(), 1u); + auto svmData = svmManager->getSVMAlloc(testedAllocation); + EXPECT_NE(nullptr, svmData); + + svmManager->freeSVMAlloc(testedAllocation); + + svmManager->trimUSMDeviceAllocCache(); +} + using SvmHostAllocationCacheTest = Test; TEST_F(SvmHostAllocationCacheTest, givenAllocationCacheDefaultWhenCheckingIfEnabledThenItIsDisabled) { diff --git a/shared/test/unit_test/program/program_initialization_tests.cpp b/shared/test/unit_test/program/program_initialization_tests.cpp index 463eea249b..cd3bc49b5f 100644 --- a/shared/test/unit_test/program/program_initialization_tests.cpp +++ b/shared/test/unit_test/program/program_initialization_tests.cpp @@ -93,6 +93,7 @@ TEST(AllocateGlobalSurfaceTest, GivenSvmAllocsManagerWhenGlobalsAreExportedThenM EXPECT_EQ(InternalMemoryType::deviceUnifiedMemory, svmAllocsManager.getSVMAlloc(reinterpret_cast(alloc->getGpuAddress()))->memoryType); EXPECT_EQ(AllocationType::constantSurface, alloc->getAllocationType()); EXPECT_FALSE(alloc->getDefaultGmm()->resourceParams.Flags.Info.NotLockable); + EXPECT_TRUE(svmAllocsManager.requestedZeroedOutAllocation); svmAllocsManager.freeSVMAlloc(reinterpret_cast(static_cast(alloc->getGpuAddress()))); alloc = allocateGlobalsSurface(&svmAllocsManager, device, initData.size(), 0u, true /* constant */, &linkerInputExportGlobalVariables, initData.data());