feature: add pooling of USM global/constant surface

Related-To: NEO-12287
Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwoliński
2025-09-19 14:53:48 +00:00
committed by Compute-Runtime-Automation
parent 0b6b0e3954
commit a1c5fa1a13
15 changed files with 635 additions and 42 deletions

View File

@@ -77,6 +77,13 @@ Device::~Device() {
if (deviceUsmMemAllocPoolsManager) {
deviceUsmMemAllocPoolsManager->cleanup();
}
if (usmConstantSurfaceAllocPool) {
usmConstantSurfaceAllocPool->cleanup();
}
if (usmGlobalSurfaceAllocPool) {
usmGlobalSurfaceAllocPool->cleanup();
}
secondaryCsrs.clear();
executionEnvironment->memoryManager->releaseSecondaryOsContexts(this->getRootDeviceIndex());
commandStreamReceivers.clear();
@@ -222,6 +229,10 @@ bool Device::initializeCommonResources() {
deviceBitfields.emplace(getRootDeviceIndex(), getDeviceBitfield());
deviceUsmMemAllocPoolsManager.reset(new UsmMemAllocPoolsManager(getMemoryManager(), rootDeviceIndices, deviceBitfields, this, InternalMemoryType::deviceUnifiedMemory));
}
this->resetUsmConstantSurfaceAllocPool(new UsmMemAllocPool);
this->resetUsmGlobalSurfaceAllocPool(new UsmMemAllocPool);
return true;
}
@@ -267,6 +278,14 @@ void Device::cleanupUsmAllocationPool() {
}
}
void Device::resetUsmConstantSurfaceAllocPool(UsmMemAllocPool *usmMemAllocPool) {
this->usmConstantSurfaceAllocPool.reset(usmMemAllocPool);
}
void Device::resetUsmGlobalSurfaceAllocPool(UsmMemAllocPool *usmMemAllocPool) {
this->usmGlobalSurfaceAllocPool.reset(usmMemAllocPool);
}
bool Device::initDeviceFully() {
if (!getRootDeviceEnvironment().isExposeSingleDeviceMode()) {

View File

@@ -213,6 +213,12 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
UsmMemAllocPool *getUsmMemAllocPool() {
return usmMemAllocPool.get();
}
UsmMemAllocPool *getUsmConstantSurfaceAllocPool() {
return usmConstantSurfaceAllocPool.get();
}
UsmMemAllocPool *getUsmGlobalSurfaceAllocPool() {
return usmGlobalSurfaceAllocPool.get();
}
MOCKABLE_VIRTUAL void stopDirectSubmissionAndWaitForCompletion();
MOCKABLE_VIRTUAL void pollForCompletion();
bool isAnyDirectSubmissionEnabled() const;
@@ -262,6 +268,9 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
void resetUsmAllocationPool(UsmMemAllocPool *usmMemAllocPool);
void cleanupUsmAllocationPool();
void resetUsmConstantSurfaceAllocPool(UsmMemAllocPool *usmMemAllocPool);
void resetUsmGlobalSurfaceAllocPool(UsmMemAllocPool *usmMemAllocPool);
std::unordered_map<uint32_t, bool> crossAccessEnabledDevices;
bool canAccessPeer(QueryPeerAccessFunc queryPeerAccess, Device *peerDevice, bool &canAccess);
static void initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, const std::vector<NEO::Device *> &devices);
@@ -353,6 +362,8 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
TimestampPoolAllocator deviceTimestampPoolAllocator;
std::unique_ptr<UsmMemAllocPoolsManager> deviceUsmMemAllocPoolsManager;
std::unique_ptr<UsmMemAllocPool> usmMemAllocPool;
std::unique_ptr<UsmMemAllocPool> usmConstantSurfaceAllocPool;
std::unique_ptr<UsmMemAllocPool> usmGlobalSurfaceAllocPool;
std::atomic_uint32_t bufferPoolCount = 0u;
uint32_t maxBufferPoolCount = 0u;