performance(usm): l0, add usm host memory pooling

Disabled by default.

Related-To: NEO-11356

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2024-05-15 13:30:37 +00:00
committed by Compute-Runtime-Automation
parent ae8c7589dc
commit b4d839fe29
20 changed files with 345 additions and 15 deletions

View File

@@ -12,6 +12,7 @@
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/memory_operations_handler.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
@@ -132,6 +133,13 @@ ze_result_t ContextImp::allocHostMem(const ze_host_mem_alloc_desc_t *hostDesc,
unifiedMemoryProperties.allocationFlags.hostptr = reinterpret_cast<uintptr_t>(*ptr);
}
if (false == lookupTable.exportMemory) {
if (auto usmPtrFromPool = this->driverHandle->usmHostMemAllocPool.createUnifiedMemoryAllocation(size, unifiedMemoryProperties)) {
*ptr = usmPtrFromPool;
return ZE_RESULT_SUCCESS;
}
}
auto usmPtr = this->driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(size,
unifiedMemoryProperties);
if (usmPtr == nullptr) {
@@ -424,6 +432,10 @@ ze_result_t ContextImp::freeMem(const void *ptr, bool blocking) {
for (auto &pairDevice : this->devices) {
this->freePeerAllocations(ptr, blocking, Device::fromHandle(pairDevice.second));
}
if (this->driverHandle->usmHostMemAllocPool.freeSVMAlloc(ptr, blocking)) {
return ZE_RESULT_SUCCESS;
}
this->driverHandle->svmAllocsManager->freeSVMAlloc(const_cast<void *>(ptr), blocking);
return ZE_RESULT_SUCCESS;
@@ -588,6 +600,10 @@ void ContextImp::setIPCHandleData(NEO::GraphicsAllocation *graphicsAllocation, u
ipcData.handle = handle;
ipcData.type = type;
if (this->driverHandle->usmHostMemAllocPool.isInPool(addrToPtr(ptrAddress))) {
ipcData.poolOffset = this->driverHandle->usmHostMemAllocPool.getOffsetInPool(addrToPtr(ptrAddress));
}
auto lock = this->driverHandle->lockIPCHandleMap();
ipcHandleIterator = this->driverHandle->getIPCHandleMap().find(handle);
if (ipcHandleIterator != this->driverHandle->getIPCHandleMap().end()) {
@@ -711,6 +727,8 @@ ze_result_t ContextImp::openIpcMemHandle(ze_device_handle_t hDevice,
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
*ptr = ptrOffset(*ptr, ipcData.poolOffset);
return ZE_RESULT_SUCCESS;
}

View File

@@ -13,6 +13,7 @@
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/device_bitfield.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/string.h"
@@ -182,6 +183,7 @@ DriverHandleImp::~DriverHandleImp() {
memoryManager->peekExecutionEnvironment().prepareForCleanup();
if (this->svmAllocsManager) {
this->svmAllocsManager->trimUSMDeviceAllocCache();
this->usmHostMemAllocPool.cleanup();
}
}
@@ -278,6 +280,7 @@ ze_result_t DriverHandleImp::initialize(std::vector<std::unique_ptr<NEO::Device>
return ZE_RESULT_ERROR_OUT_OF_HOST_MEMORY;
}
this->svmAllocsManager->initUsmAllocationsCaches(*this->devices[0]->getNEODevice());
this->initHostUsmAllocPool();
this->numDevices = static_cast<uint32_t>(this->devices.size());
@@ -402,6 +405,20 @@ ze_result_t DriverHandleImp::parseAffinityMaskCombined(uint32_t *pCount, ze_devi
return ZE_RESULT_SUCCESS;
}
void DriverHandleImp::initHostUsmAllocPool() {
auto usmHostAllocPoolingEnabled = NEO::ApiSpecificConfig::isHostUsmPoolingEnabled();
auto poolSize = 2 * MemoryConstants::megaByte;
if (NEO::debugManager.flags.EnableHostUsmAllocationPool.get() != -1) {
usmHostAllocPoolingEnabled = NEO::debugManager.flags.EnableHostUsmAllocationPool.get() > 0;
poolSize = NEO::debugManager.flags.EnableHostUsmAllocationPool.get() * MemoryConstants::megaByte;
}
if (usmHostAllocPoolingEnabled) {
NEO::SVMAllocsManager::UnifiedMemoryProperties memoryProperties(InternalMemoryType::hostUnifiedMemory, MemoryConstants::pageSize2M,
rootDeviceIndices, deviceBitfields);
usmHostMemAllocPool.initialize(svmAllocsManager, memoryProperties, poolSize);
}
}
ze_result_t DriverHandleImp::getDevice(uint32_t *pCount, ze_device_handle_t *phDevices) {
bool exposeSubDevices = false;

View File

@@ -9,6 +9,7 @@
#include "shared/source/debugger/debugger.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/unified_memory_pooling.h"
#include "shared/source/os_interface/os_library.h"
#include "level_zero/api/extensions/public/ze_exp_ext.h"
@@ -33,6 +34,7 @@ enum L0DeviceHierarchyMode {
#pragma pack(1)
struct IpcMemoryData {
uint64_t handle = 0;
uint64_t poolOffset = 0;
uint8_t type = 0;
};
#pragma pack()
@@ -122,6 +124,7 @@ struct DriverHandleImp : public DriverHandle {
ze_result_t parseAffinityMaskCombined(uint32_t *pCount, ze_device_handle_t *phDevices);
std::map<uint64_t, IpcHandleTracking *> &getIPCHandleMap() { return this->ipcHandles; };
[[nodiscard]] std::unique_lock<std::mutex> lockIPCHandleMap() { return std::unique_lock<std::mutex>(this->ipcHandleMapMutex); };
void initHostUsmAllocPool();
std::unique_ptr<HostPointerManager> hostPointerManager;
@@ -142,6 +145,7 @@ struct DriverHandleImp : public DriverHandle {
NEO::MemoryManager *memoryManager = nullptr;
NEO::SVMAllocsManager *svmAllocsManager = nullptr;
NEO::UsmMemAllocPool usmHostMemAllocPool;
std::unique_ptr<NEO::OsLibrary> rtasLibraryHandle;
bool rtasLibraryUnavailable = false;

View File

@@ -44,6 +44,14 @@ bool ApiSpecificConfig::isHostAllocationCacheEnabled() {
return false;
}
bool ApiSpecificConfig::isDeviceUsmPoolingEnabled() {
return false;
}
bool ApiSpecificConfig::isHostUsmPoolingEnabled() {
return false;
}
ApiSpecificConfig::ApiType ApiSpecificConfig::getApiType() {
return ApiSpecificConfig::L0;
}