mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-27 07:44:16 +08:00
feature: add pooling of USM global/constant surface
Related-To: NEO-12287 Signed-off-by: Fabian Zwoliński <fabian.zwolinski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
0b6b0e3954
commit
a1c5fa1a13
@@ -77,6 +77,13 @@ Device::~Device() {
|
||||
if (deviceUsmMemAllocPoolsManager) {
|
||||
deviceUsmMemAllocPoolsManager->cleanup();
|
||||
}
|
||||
if (usmConstantSurfaceAllocPool) {
|
||||
usmConstantSurfaceAllocPool->cleanup();
|
||||
}
|
||||
if (usmGlobalSurfaceAllocPool) {
|
||||
usmGlobalSurfaceAllocPool->cleanup();
|
||||
}
|
||||
|
||||
secondaryCsrs.clear();
|
||||
executionEnvironment->memoryManager->releaseSecondaryOsContexts(this->getRootDeviceIndex());
|
||||
commandStreamReceivers.clear();
|
||||
@@ -222,6 +229,10 @@ bool Device::initializeCommonResources() {
|
||||
deviceBitfields.emplace(getRootDeviceIndex(), getDeviceBitfield());
|
||||
deviceUsmMemAllocPoolsManager.reset(new UsmMemAllocPoolsManager(getMemoryManager(), rootDeviceIndices, deviceBitfields, this, InternalMemoryType::deviceUnifiedMemory));
|
||||
}
|
||||
|
||||
this->resetUsmConstantSurfaceAllocPool(new UsmMemAllocPool);
|
||||
this->resetUsmGlobalSurfaceAllocPool(new UsmMemAllocPool);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -267,6 +278,14 @@ void Device::cleanupUsmAllocationPool() {
|
||||
}
|
||||
}
|
||||
|
||||
void Device::resetUsmConstantSurfaceAllocPool(UsmMemAllocPool *usmMemAllocPool) {
|
||||
this->usmConstantSurfaceAllocPool.reset(usmMemAllocPool);
|
||||
}
|
||||
|
||||
void Device::resetUsmGlobalSurfaceAllocPool(UsmMemAllocPool *usmMemAllocPool) {
|
||||
this->usmGlobalSurfaceAllocPool.reset(usmMemAllocPool);
|
||||
}
|
||||
|
||||
bool Device::initDeviceFully() {
|
||||
|
||||
if (!getRootDeviceEnvironment().isExposeSingleDeviceMode()) {
|
||||
|
||||
@@ -213,6 +213,12 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
|
||||
UsmMemAllocPool *getUsmMemAllocPool() {
|
||||
return usmMemAllocPool.get();
|
||||
}
|
||||
UsmMemAllocPool *getUsmConstantSurfaceAllocPool() {
|
||||
return usmConstantSurfaceAllocPool.get();
|
||||
}
|
||||
UsmMemAllocPool *getUsmGlobalSurfaceAllocPool() {
|
||||
return usmGlobalSurfaceAllocPool.get();
|
||||
}
|
||||
MOCKABLE_VIRTUAL void stopDirectSubmissionAndWaitForCompletion();
|
||||
MOCKABLE_VIRTUAL void pollForCompletion();
|
||||
bool isAnyDirectSubmissionEnabled() const;
|
||||
@@ -262,6 +268,9 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
|
||||
void resetUsmAllocationPool(UsmMemAllocPool *usmMemAllocPool);
|
||||
void cleanupUsmAllocationPool();
|
||||
|
||||
void resetUsmConstantSurfaceAllocPool(UsmMemAllocPool *usmMemAllocPool);
|
||||
void resetUsmGlobalSurfaceAllocPool(UsmMemAllocPool *usmMemAllocPool);
|
||||
|
||||
std::unordered_map<uint32_t, bool> crossAccessEnabledDevices;
|
||||
bool canAccessPeer(QueryPeerAccessFunc queryPeerAccess, Device *peerDevice, bool &canAccess);
|
||||
static void initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, const std::vector<NEO::Device *> &devices);
|
||||
@@ -353,6 +362,8 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
|
||||
TimestampPoolAllocator deviceTimestampPoolAllocator;
|
||||
std::unique_ptr<UsmMemAllocPoolsManager> deviceUsmMemAllocPoolsManager;
|
||||
std::unique_ptr<UsmMemAllocPool> usmMemAllocPool;
|
||||
std::unique_ptr<UsmMemAllocPool> usmConstantSurfaceAllocPool;
|
||||
std::unique_ptr<UsmMemAllocPool> usmGlobalSurfaceAllocPool;
|
||||
|
||||
std::atomic_uint32_t bufferPoolCount = 0u;
|
||||
uint32_t maxBufferPoolCount = 0u;
|
||||
|
||||
@@ -27,7 +27,7 @@ class UsmMemAllocPool {
|
||||
|
||||
UsmMemAllocPool() = default;
|
||||
virtual ~UsmMemAllocPool() = default;
|
||||
bool initialize(SVMAllocsManager *svmMemoryManager, const UnifiedMemoryProperties &memoryProperties, size_t poolSize, size_t minServicedSize, size_t maxServicedSize);
|
||||
MOCKABLE_VIRTUAL bool initialize(SVMAllocsManager *svmMemoryManager, const UnifiedMemoryProperties &memoryProperties, size_t poolSize, size_t minServicedSize, size_t maxServicedSize);
|
||||
bool initialize(SVMAllocsManager *svmMemoryManager, void *ptr, SvmAllocationData *svmData, size_t minServicedSize, size_t maxServicedSize);
|
||||
bool isInitialized() const;
|
||||
size_t getPoolSize() const;
|
||||
@@ -37,14 +37,15 @@ class UsmMemAllocPool {
|
||||
static double getPercentOfFreeMemoryForRecycling(InternalMemoryType memoryType);
|
||||
bool sizeIsAllowed(size_t size);
|
||||
bool canBePooled(size_t size, const UnifiedMemoryProperties &memoryProperties);
|
||||
void *createUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties);
|
||||
MOCKABLE_VIRTUAL void *createUnifiedMemoryAllocation(size_t size, const UnifiedMemoryProperties &memoryProperties);
|
||||
bool isInPool(const void *ptr) const;
|
||||
bool isEmpty();
|
||||
bool freeSVMAlloc(const void *ptr, bool blocking);
|
||||
MOCKABLE_VIRTUAL bool freeSVMAlloc(const void *ptr, bool blocking);
|
||||
size_t getPooledAllocationSize(const void *ptr);
|
||||
void *getPooledAllocationBasePtr(const void *ptr);
|
||||
size_t getOffsetInPool(const void *ptr) const;
|
||||
uint64_t getPoolAddress() const;
|
||||
std::mutex &getMutex() noexcept { return mtx; }
|
||||
|
||||
static constexpr auto chunkAlignment = 512u;
|
||||
static constexpr auto poolAlignment = MemoryConstants::pageSize2M;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
|
||||
#include "program_initialization.h"
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/compiler_interface/linker.h"
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/helpers/blit_commands_helper.h"
|
||||
@@ -18,6 +19,8 @@
|
||||
#include "shared/source/memory_manager/unified_memory_pooling.h"
|
||||
#include "shared/source/program/program_info.h"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAllocManager, NEO::Device &device, size_t totalSize, size_t zeroInitSize, bool constant,
|
||||
@@ -26,6 +29,8 @@ SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAll
|
||||
size_t allocatedSize{0u};
|
||||
bool globalsAreExported = false;
|
||||
GraphicsAllocation *gpuAllocation = nullptr;
|
||||
bool isAllocatedFromPool = false;
|
||||
std::mutex *usmAllocPoolMutex = nullptr;
|
||||
const auto rootDeviceIndex = device.getRootDeviceIndex();
|
||||
const auto deviceBitfield = device.getDeviceBitfield();
|
||||
|
||||
@@ -42,27 +47,69 @@ SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAll
|
||||
unifiedMemoryProperties.device = &device;
|
||||
unifiedMemoryProperties.requestedAllocationType = allocationType;
|
||||
unifiedMemoryProperties.isInternalAllocation = true;
|
||||
auto ptr = svmAllocManager->createUnifiedMemoryAllocation(totalSize, unifiedMemoryProperties);
|
||||
DEBUG_BREAK_IF(ptr == nullptr);
|
||||
if (ptr == nullptr) {
|
||||
return nullptr;
|
||||
|
||||
UsmMemAllocPool *allocPool = nullptr;
|
||||
if (allocationType == AllocationType::constantSurface) {
|
||||
allocPool = device.getUsmConstantSurfaceAllocPool();
|
||||
} else {
|
||||
allocPool = device.getUsmGlobalSurfaceAllocPool();
|
||||
}
|
||||
|
||||
if (allocPool && device.getProductHelper().is2MBLocalMemAlignmentEnabled()) {
|
||||
if (!allocPool->isInitialized()) {
|
||||
constexpr size_t alignment = MemoryConstants::pageSize2M;
|
||||
constexpr size_t poolSize = MemoryConstants::pageSize2M;
|
||||
constexpr size_t minServicedSize = 0u;
|
||||
constexpr size_t maxServicedSize = 2 * MemoryConstants::megaByte;
|
||||
|
||||
NEO::SVMAllocsManager::UnifiedMemoryProperties poolMemoryProperties(InternalMemoryType::deviceUnifiedMemory, alignment, rootDeviceIndices, subDeviceBitfields);
|
||||
poolMemoryProperties.device = &device;
|
||||
poolMemoryProperties.requestedAllocationType = allocationType;
|
||||
poolMemoryProperties.isInternalAllocation = true;
|
||||
|
||||
allocPool->initialize(svmAllocManager, poolMemoryProperties, poolSize, minServicedSize, maxServicedSize);
|
||||
}
|
||||
|
||||
if (allocPool->isInitialized()) {
|
||||
unifiedMemoryProperties.alignment = MemoryConstants::pageSize;
|
||||
auto pooledPtr = allocPool->createUnifiedMemoryAllocation(totalSize, unifiedMemoryProperties);
|
||||
if (pooledPtr) {
|
||||
allocationOffset = allocPool->getOffsetInPool(pooledPtr);
|
||||
allocatedSize = allocPool->getPooledAllocationSize(pooledPtr);
|
||||
auto usmAlloc = svmAllocManager->getSVMAlloc(reinterpret_cast<void *>(allocPool->getPoolAddress()));
|
||||
UNRECOVERABLE_IF(usmAlloc == nullptr);
|
||||
gpuAllocation = usmAlloc->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||
usmAllocPoolMutex = &allocPool->getMutex();
|
||||
isAllocatedFromPool = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!gpuAllocation) {
|
||||
auto ptr = svmAllocManager->createUnifiedMemoryAllocation(totalSize, unifiedMemoryProperties);
|
||||
DEBUG_BREAK_IF(ptr == nullptr);
|
||||
if (ptr == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
auto usmAlloc = svmAllocManager->getSVMAlloc(ptr);
|
||||
UNRECOVERABLE_IF(usmAlloc == nullptr);
|
||||
gpuAllocation = usmAlloc->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||
allocationOffset = 0u;
|
||||
allocatedSize = gpuAllocation->getUnderlyingBufferSize();
|
||||
}
|
||||
auto usmAlloc = svmAllocManager->getSVMAlloc(ptr);
|
||||
UNRECOVERABLE_IF(usmAlloc == nullptr);
|
||||
gpuAllocation = usmAlloc->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||
} else {
|
||||
gpuAllocation = device.getMemoryManager()->allocateGraphicsMemoryWithProperties({rootDeviceIndex,
|
||||
true, // allocateMemory
|
||||
totalSize, allocationType,
|
||||
false, // isMultiStorageAllocation
|
||||
deviceBitfield});
|
||||
if (nullptr == gpuAllocation) {
|
||||
return nullptr;
|
||||
}
|
||||
allocationOffset = 0u;
|
||||
allocatedSize = gpuAllocation->getUnderlyingBufferSize();
|
||||
}
|
||||
|
||||
if (!gpuAllocation) {
|
||||
return nullptr;
|
||||
}
|
||||
allocatedSize = gpuAllocation->getUnderlyingBufferSize();
|
||||
|
||||
auto &rootDeviceEnvironment = device.getRootDeviceEnvironment();
|
||||
auto &productHelper = device.getProductHelper();
|
||||
|
||||
@@ -72,6 +119,28 @@ SharedPoolAllocation *allocateGlobalsSurface(NEO::SVMAllocsManager *const svmAll
|
||||
auto success = MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *gpuAllocation),
|
||||
device, gpuAllocation, allocationOffset, initData, initSize);
|
||||
UNRECOVERABLE_IF(!success);
|
||||
|
||||
if (auto csr = device.getDefaultEngine().commandStreamReceiver;
|
||||
isAllocatedFromPool && csr->getType() != NEO::CommandStreamReceiverType::hardware) {
|
||||
auto writeMemoryOperation = [&]() {
|
||||
constexpr uint32_t allBanks = std::numeric_limits<uint32_t>::max();
|
||||
if (gpuAllocation->isTbxWritable(allBanks)) {
|
||||
// initialize full page tables for the first time
|
||||
csr->writeMemory(*gpuAllocation, false, 0, 0);
|
||||
}
|
||||
gpuAllocation->setTbxWritable(true, allBanks);
|
||||
[[maybe_unused]] const auto writeMemoryStatus = csr->writeMemory(*gpuAllocation, true, allocationOffset, allocatedSize);
|
||||
DEBUG_BREAK_IF(!writeMemoryStatus);
|
||||
gpuAllocation->setTbxWritable(false, allBanks);
|
||||
};
|
||||
|
||||
if (usmAllocPoolMutex) {
|
||||
std::lock_guard<std::mutex> lock(*usmAllocPoolMutex);
|
||||
writeMemoryOperation();
|
||||
} else {
|
||||
writeMemoryOperation();
|
||||
}
|
||||
}
|
||||
}
|
||||
return new SharedPoolAllocation(gpuAllocation, allocationOffset, allocatedSize, nullptr);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user