performance: enable l0 usm reuse

Do not reuse internal or imported allocations.
Mark as aub writable on getting from reuse.

Related-To: NEO-6893

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2025-06-25 11:17:23 +00:00
committed by Compute-Runtime-Automation
parent 3e024eccb4
commit 5c7fd72d5f
7 changed files with 47 additions and 12 deletions

View File

@@ -52,11 +52,11 @@ bool ApiSpecificConfig::getBindlessMode(const Device &device) {
}
bool ApiSpecificConfig::isDeviceAllocationCacheEnabled() {
return false;
return true;
}
bool ApiSpecificConfig::isHostAllocationCacheEnabled() {
return false;
return true;
}
bool ApiSpecificConfig::isDeviceUsmPoolingEnabled() {

View File

@@ -17,6 +17,7 @@
#include "level_zero/core/source/context/context_imp.h"
#include "level_zero/core/source/device/device_imp.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
#include "level_zero/core/test/common/ult_helpers_l0.h"
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
@@ -197,12 +198,7 @@ void MemoryExportImportWSLTest::SetUp() {
}
void MemoryExportImportWSLTest::TearDown() {
// cleanup pools before restoring memory manager
for (auto device : driverHandle->devices) {
device->getNEODevice()->cleanupUsmAllocationPool();
device->getNEODevice()->resetUsmAllocationPool(nullptr);
}
driverHandle->usmHostMemAllocPool.cleanup();
L0UltHelper::cleanupUsmAllocPoolsAndReuse(driverHandle.get());
driverHandle->setMemoryManager(prevMemoryManager);
delete currMemoryManager;
}

View File

@@ -461,6 +461,8 @@ TEST_F(ContextGetStatusTest, givenCallToContextGetStatusThenCorrectErrorCodeIsRe
res = context->getStatus();
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
driverHandle->getSvmAllocsManager()->cleanupUSMAllocCaches();
for (auto device : driverHandle->devices) {
L0::DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
deviceImp->releaseResources();

View File

@@ -2824,6 +2824,7 @@ TEST_F(DeviceGetStatusTest, givenCallToDeviceGetStatusThenCorrectErrorCodeIsRetu
ze_result_t res = device->getStatus();
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
driverHandle->getSvmAllocsManager()->cleanupUSMAllocCaches();
deviceImp->releaseResources();
res = device->getStatus();
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, res);
@@ -4381,6 +4382,7 @@ TEST_F(DeviceTest, givenValidDeviceWhenCallingReleaseResourcesThenResourcesRelea
auto deviceImp = static_cast<DeviceImp *>(device);
EXPECT_FALSE(deviceImp->resourcesReleased);
EXPECT_FALSE(nullptr == deviceImp->getNEODevice());
driverHandle->getSvmAllocsManager()->cleanupUSMAllocCaches();
deviceImp->releaseResources();
EXPECT_TRUE(deviceImp->resourcesReleased);
EXPECT_TRUE(nullptr == deviceImp->getNEODevice());

View File

@@ -44,8 +44,8 @@ TEST(ApiSpecificConfigL0Tests, WhenGettingRegistryPathThenL0RegistryPathIsReturn
}
TEST(ApiSpecificConfigL0Tests, WhenCheckingIfHostDeviceAllocationCacheIsEnabledThenReturnFalse) {
EXPECT_FALSE(ApiSpecificConfig::isHostAllocationCacheEnabled());
EXPECT_FALSE(ApiSpecificConfig::isDeviceAllocationCacheEnabled());
EXPECT_TRUE(ApiSpecificConfig::isHostAllocationCacheEnabled());
EXPECT_TRUE(ApiSpecificConfig::isDeviceAllocationCacheEnabled());
}
TEST(ApiSpecificConfigL0Tests, WhenCheckingIfUsmAllocPoolingIsEnabledThenReturnCorrectValue) {

View File

@@ -61,7 +61,9 @@ SVMAllocsManager::SvmAllocationCache::SvmAllocationCache() {
}
bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr, SvmAllocationData *svmData) {
if (false == sizeAllowed(size)) {
if (false == sizeAllowed(size) ||
svmData->isInternalAllocation ||
svmData->isImportedAllocation) {
return false;
}
if (svmData->device ? svmData->device->shouldLimitAllocationsReuse() : memoryManager->shouldLimitAllocationsReuse()) {
@@ -159,6 +161,7 @@ void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemory
}
allocationIter->svmData->size = size;
allocationIter->svmData->isSavedForReuse = false;
allocationIter->svmData->gpuAllocations.getDefaultGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
allocations.erase(allocationIter);
return allocationPtr;
}
@@ -645,7 +648,6 @@ bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) {
SvmAllocationData *svmData = getSVMAlloc(ptr);
if (svmData) {
if (InternalMemoryType::deviceUnifiedMemory == svmData->memoryType &&
false == svmData->isInternalAllocation &&
this->usmDeviceAllocationsCache) {
if (this->usmDeviceAllocationsCache->insert(svmData->gpuAllocations.getDefaultGraphicsAllocation()->getUnderlyingBufferSize(), ptr, svmData)) {
return true;

View File

@@ -155,6 +155,39 @@ TEST(SvmAllocationCacheSimpleTest, givenAllocationsWhenCheckingIsInUseThenReturn
}
}
TEST(SvmAllocationCacheSimpleTest, givenAllocationsWhenInsertingAllocationThenDoNotInsertImportedNorInternal) {
SVMAllocsManager::SvmAllocationCache allocationCache;
MockMemoryManager memoryManager;
MockSVMAllocsManager svmAllocsManager(&memoryManager);
allocationCache.memoryManager = &memoryManager;
allocationCache.svmAllocsManager = &svmAllocsManager;
memoryManager.usmReuseInfo.init(1 * MemoryConstants::gigaByte, UsmReuseInfo::notLimited);
void *ptr = addrToPtr(0xFULL);
MockGraphicsAllocation gpuGfxAllocation;
SvmAllocationData svmAllocData(mockRootDeviceIndex);
svmAllocData.gpuAllocations.addAllocation(&gpuGfxAllocation);
{
svmAllocData.isImportedAllocation = false;
svmAllocData.isInternalAllocation = false;
EXPECT_TRUE(allocationCache.insert(1u, ptr, &svmAllocData));
allocationCache.allocations.clear();
}
{
svmAllocData.isImportedAllocation = true;
svmAllocData.isInternalAllocation = false;
EXPECT_FALSE(allocationCache.insert(1u, ptr, &svmAllocData));
allocationCache.allocations.clear();
}
{
svmAllocData.isImportedAllocation = false;
svmAllocData.isInternalAllocation = true;
EXPECT_FALSE(allocationCache.insert(1u, ptr, &svmAllocData));
allocationCache.allocations.clear();
}
}
struct SvmAllocationCacheTestFixture {
SvmAllocationCacheTestFixture() : executionEnvironment(defaultHwInfo.get()) {}
void setUp() {