mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 17:29:14 +08:00
When allocation is saved for reuse, remove it from allocations map. On get from reuse, reinsert it with new allocId. Allocations saved for reuse should not be made resident for indirect access. Only applicable on aub/tbx csr types. Related-To: NEO-6893 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
1225 lines
58 KiB
C++
1225 lines
58 KiB
C++
/*
|
|
* Copyright (C) 2019-2025 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#include "shared/source/memory_manager/unified_memory_manager.h"
|
|
|
|
#include "shared/source/ail/ail_configuration.h"
|
|
#include "shared/source/command_stream/command_stream_receiver.h"
|
|
#include "shared/source/device/sub_device.h"
|
|
#include "shared/source/execution_environment/execution_environment.h"
|
|
#include "shared/source/execution_environment/root_device_environment.h"
|
|
#include "shared/source/helpers/aligned_memory.h"
|
|
#include "shared/source/helpers/api_specific_config.h"
|
|
#include "shared/source/helpers/basic_math.h"
|
|
#include "shared/source/helpers/gfx_core_helper.h"
|
|
#include "shared/source/helpers/hw_info.h"
|
|
#include "shared/source/helpers/memory_properties_helpers.h"
|
|
#include "shared/source/memory_manager/allocation_properties.h"
|
|
#include "shared/source/memory_manager/compression_selector.h"
|
|
#include "shared/source/memory_manager/memory_manager.h"
|
|
#include "shared/source/memory_manager/unified_memory_reuse_cleaner.h"
|
|
#include "shared/source/os_interface/os_context.h"
|
|
#include "shared/source/os_interface/product_helper.h"
|
|
#include "shared/source/page_fault_manager/cpu_page_fault_manager.h"
|
|
|
|
namespace NEO {
|
|
|
|
uint32_t SVMAllocsManager::UnifiedMemoryProperties::getRootDeviceIndex() const {
|
|
if (device) {
|
|
return device->getRootDeviceIndex();
|
|
}
|
|
UNRECOVERABLE_IF(rootDeviceIndices.begin() == nullptr);
|
|
return *rootDeviceIndices.begin();
|
|
}
|
|
|
|
void SVMAllocsManager::MapBasedAllocationTracker::insert(const SvmAllocationData &allocationsPair) {
|
|
allocations.insert(std::make_pair(reinterpret_cast<void *>(allocationsPair.gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()), allocationsPair));
|
|
}
|
|
|
|
void SVMAllocsManager::MapBasedAllocationTracker::remove(const SvmAllocationData &allocationsPair) {
|
|
SvmAllocationContainer::iterator iter;
|
|
iter = allocations.find(reinterpret_cast<void *>(allocationsPair.gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()));
|
|
allocations.erase(iter);
|
|
}
|
|
|
|
void SVMAllocsManager::MapBasedAllocationTracker::freeAllocations(NEO::MemoryManager &memoryManager) {
|
|
std::unique_lock<NEO::SpinLock> lock(mutex);
|
|
|
|
for (auto &allocation : allocations) {
|
|
for (auto &gpuAllocation : allocation.second.gpuAllocations.getGraphicsAllocations()) {
|
|
memoryManager.freeGraphicsMemory(gpuAllocation);
|
|
}
|
|
}
|
|
}
|
|
|
|
SVMAllocsManager::SvmAllocationCache::SvmAllocationCache() {
|
|
this->enablePerformanceLogging = NEO::debugManager.flags.LogUsmReuse.get();
|
|
}
|
|
|
|
bool SVMAllocsManager::SvmAllocationCache::insert(size_t size, void *ptr, SvmAllocationData *svmData, bool waitForCompletion) {
|
|
if (false == sizeAllowed(size) ||
|
|
svmData->isInternalAllocation ||
|
|
svmData->isImportedAllocation) {
|
|
return false;
|
|
}
|
|
if (svmData->device ? svmData->device->shouldLimitAllocationsReuse() : memoryManager->shouldLimitAllocationsReuse()) {
|
|
return false;
|
|
}
|
|
if (svmData->isSavedForReuse) {
|
|
return true;
|
|
}
|
|
std::lock_guard<std::mutex> lock(this->mtx);
|
|
bool isSuccess = true;
|
|
if (auto device = svmData->device) {
|
|
auto lock = device->usmReuseInfo.obtainAllocationsReuseLock();
|
|
if (size + device->usmReuseInfo.getAllocationsSavedForReuseSize() > device->usmReuseInfo.getMaxAllocationsSavedForReuseSize()) {
|
|
isSuccess = false;
|
|
} else {
|
|
device->usmReuseInfo.recordAllocationSaveForReuse(size);
|
|
}
|
|
} else {
|
|
auto lock = memoryManager->usmReuseInfo.obtainAllocationsReuseLock();
|
|
if (size + memoryManager->usmReuseInfo.getAllocationsSavedForReuseSize() > memoryManager->usmReuseInfo.getMaxAllocationsSavedForReuseSize()) {
|
|
isSuccess = false;
|
|
} else {
|
|
memoryManager->usmReuseInfo.recordAllocationSaveForReuse(size);
|
|
}
|
|
}
|
|
if (isSuccess) {
|
|
if (waitForCompletion) {
|
|
svmAllocsManager->waitForEnginesCompletion(svmData);
|
|
}
|
|
if (requireUpdatingAllocsForIndirectAccess) {
|
|
svmAllocsManager->removeFromAllocsForIndirectAccess(*svmData);
|
|
}
|
|
svmData->isSavedForReuse = true;
|
|
allocations.emplace(std::lower_bound(allocations.begin(), allocations.end(), size), size, ptr, svmData, waitForCompletion);
|
|
}
|
|
if (enablePerformanceLogging) {
|
|
logCacheOperation({.allocationSize = size,
|
|
.timePoint = std::chrono::high_resolution_clock::now(),
|
|
.allocationType = svmData->memoryType,
|
|
.operationType = CacheOperationType::insert,
|
|
.isSuccess = isSuccess});
|
|
}
|
|
return isSuccess;
|
|
}
|
|
|
|
bool SVMAllocsManager::SvmAllocationCache::allocUtilizationAllows(size_t requestedSize, size_t reuseCandidateSize) {
|
|
if (reuseCandidateSize >= SvmAllocationCache::minimalSizeToCheckUtilization) {
|
|
const auto allocUtilization = static_cast<double>(requestedSize) / reuseCandidateSize;
|
|
return allocUtilization >= SvmAllocationCache::minimalAllocUtilization;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool SVMAllocsManager::SvmAllocationCache::alignmentAllows(void *ptr, size_t alignment) {
|
|
return 0u == alignment || isAligned(castToUint64(ptr), alignment);
|
|
}
|
|
|
|
bool SVMAllocsManager::SvmAllocationCache::isInUse(SvmCacheAllocationInfo &cacheAllocInfo) {
|
|
if (cacheAllocInfo.completed) {
|
|
return false;
|
|
}
|
|
if (cacheAllocInfo.svmData->cpuAllocation && memoryManager->allocInUse(*cacheAllocInfo.svmData->cpuAllocation)) {
|
|
return true;
|
|
}
|
|
for (auto &gpuAllocation : cacheAllocInfo.svmData->gpuAllocations.getGraphicsAllocations()) {
|
|
if (gpuAllocation && memoryManager->allocInUse(*gpuAllocation)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void *SVMAllocsManager::SvmAllocationCache::get(size_t size, const UnifiedMemoryProperties &unifiedMemoryProperties) {
|
|
if (false == sizeAllowed(size)) {
|
|
return nullptr;
|
|
}
|
|
std::lock_guard<std::mutex> lock(this->mtx);
|
|
for (auto allocationIter = std::lower_bound(allocations.begin(), allocations.end(), size);
|
|
allocationIter != allocations.end();
|
|
++allocationIter) {
|
|
if (false == allocUtilizationAllows(size, allocationIter->allocationSize)) {
|
|
break;
|
|
}
|
|
void *allocationPtr = allocationIter->allocation;
|
|
DEBUG_BREAK_IF(nullptr == allocationIter->svmData);
|
|
if (allocationIter->svmData->device == unifiedMemoryProperties.device &&
|
|
allocationIter->svmData->allocationFlagsProperty.allFlags == unifiedMemoryProperties.allocationFlags.allFlags &&
|
|
allocationIter->svmData->allocationFlagsProperty.allAllocFlags == unifiedMemoryProperties.allocationFlags.allAllocFlags &&
|
|
alignmentAllows(allocationIter->allocation, unifiedMemoryProperties.alignment) &&
|
|
false == isInUse(*allocationIter)) {
|
|
if (allocationIter->svmData->device) {
|
|
auto lock = allocationIter->svmData->device->usmReuseInfo.obtainAllocationsReuseLock();
|
|
allocationIter->svmData->device->usmReuseInfo.recordAllocationGetFromReuse(allocationIter->allocationSize);
|
|
} else {
|
|
auto lock = memoryManager->usmReuseInfo.obtainAllocationsReuseLock();
|
|
memoryManager->usmReuseInfo.recordAllocationGetFromReuse(allocationIter->allocationSize);
|
|
}
|
|
if (enablePerformanceLogging) {
|
|
logCacheOperation({.allocationSize = allocationIter->allocationSize,
|
|
.timePoint = std::chrono::high_resolution_clock::now(),
|
|
.allocationType = allocationIter->svmData->memoryType,
|
|
.operationType = CacheOperationType::get,
|
|
.isSuccess = true});
|
|
}
|
|
allocationIter->svmData->size = size;
|
|
allocationIter->svmData->isSavedForReuse = false;
|
|
allocationIter->svmData->gpuAllocations.getDefaultGraphicsAllocation()->setAubWritable(true, std::numeric_limits<uint32_t>::max());
|
|
allocationIter->svmData->gpuAllocations.getDefaultGraphicsAllocation()->setTbxWritable(true, std::numeric_limits<uint32_t>::max());
|
|
if (requireUpdatingAllocsForIndirectAccess) {
|
|
allocationIter->svmData->setAllocId(++svmAllocsManager->allocationsCounter);
|
|
svmAllocsManager->reinsertToAllocsForIndirectAccess(*allocationIter->svmData);
|
|
}
|
|
allocations.erase(allocationIter);
|
|
return allocationPtr;
|
|
}
|
|
}
|
|
if (enablePerformanceLogging) {
|
|
logCacheOperation({.allocationSize = size,
|
|
.timePoint = std::chrono::high_resolution_clock::now(),
|
|
.allocationType = unifiedMemoryProperties.memoryType,
|
|
.operationType = CacheOperationType::get,
|
|
.isSuccess = false});
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
void SVMAllocsManager::SvmAllocationCache::trim() {
|
|
std::lock_guard<std::mutex> lock(this->mtx);
|
|
for (auto &cachedAllocationInfo : this->allocations) {
|
|
DEBUG_BREAK_IF(nullptr == cachedAllocationInfo.svmData);
|
|
if (cachedAllocationInfo.svmData->device) {
|
|
auto lock = cachedAllocationInfo.svmData->device->usmReuseInfo.obtainAllocationsReuseLock();
|
|
cachedAllocationInfo.svmData->device->usmReuseInfo.recordAllocationGetFromReuse(cachedAllocationInfo.allocationSize);
|
|
} else {
|
|
auto lock = memoryManager->usmReuseInfo.obtainAllocationsReuseLock();
|
|
memoryManager->usmReuseInfo.recordAllocationGetFromReuse(cachedAllocationInfo.allocationSize);
|
|
}
|
|
if (enablePerformanceLogging) {
|
|
logCacheOperation({.allocationSize = cachedAllocationInfo.allocationSize,
|
|
.timePoint = std::chrono::high_resolution_clock::now(),
|
|
.allocationType = cachedAllocationInfo.svmData->memoryType,
|
|
.operationType = CacheOperationType::trim,
|
|
.isSuccess = true});
|
|
}
|
|
svmAllocsManager->freeSVMAllocImpl(cachedAllocationInfo.allocation, FreePolicyType::none, cachedAllocationInfo.svmData);
|
|
}
|
|
this->allocations.clear();
|
|
}
|
|
|
|
void SVMAllocsManager::SvmAllocationCache::cleanup() {
|
|
DEBUG_BREAK_IF(nullptr == this->memoryManager);
|
|
if (auto usmReuseCleaner = this->memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner.get()) {
|
|
usmReuseCleaner->unregisterSvmAllocationCache(this);
|
|
}
|
|
this->trim();
|
|
}
|
|
|
|
void SVMAllocsManager::SvmAllocationCache::logCacheOperation(const SvmAllocationCachePerfInfo &cachePerfEvent) const {
|
|
std::string allocationTypeString, operationTypeString, isSuccessString;
|
|
switch (cachePerfEvent.allocationType) {
|
|
case InternalMemoryType::deviceUnifiedMemory:
|
|
allocationTypeString = "device";
|
|
break;
|
|
case InternalMemoryType::hostUnifiedMemory:
|
|
allocationTypeString = "host";
|
|
break;
|
|
default:
|
|
allocationTypeString = "unknown";
|
|
break;
|
|
}
|
|
|
|
switch (cachePerfEvent.operationType) {
|
|
case CacheOperationType::get:
|
|
operationTypeString = "get";
|
|
break;
|
|
case CacheOperationType::insert:
|
|
operationTypeString = "insert";
|
|
break;
|
|
case CacheOperationType::trim:
|
|
operationTypeString = "trim";
|
|
break;
|
|
case CacheOperationType::trimOld:
|
|
operationTypeString = "trim_old";
|
|
break;
|
|
default:
|
|
operationTypeString = "unknown";
|
|
break;
|
|
}
|
|
isSuccessString = cachePerfEvent.isSuccess ? "TRUE" : "FALSE";
|
|
NEO::usmReusePerfLoggerInstance().log(true, ",",
|
|
cachePerfEvent.timePoint.time_since_epoch().count(), ",",
|
|
allocationTypeString, ",",
|
|
operationTypeString, ",",
|
|
cachePerfEvent.allocationSize, ",",
|
|
isSuccessString);
|
|
}
|
|
|
|
void SVMAllocsManager::SvmAllocationCache::trimOldAllocs(std::chrono::high_resolution_clock::time_point trimTimePoint, bool trimAll) {
|
|
std::lock_guard<std::mutex> lock(this->mtx);
|
|
auto allocCleanCandidateIndex = allocations.size();
|
|
while (0u != allocCleanCandidateIndex) {
|
|
auto &allocCleanCandidate = allocations[--allocCleanCandidateIndex];
|
|
if (allocCleanCandidate.saveTime > trimTimePoint) {
|
|
continue;
|
|
}
|
|
DEBUG_BREAK_IF(nullptr == allocCleanCandidate.svmData);
|
|
if (allocCleanCandidate.svmData->device) {
|
|
auto lock = allocCleanCandidate.svmData->device->usmReuseInfo.obtainAllocationsReuseLock();
|
|
allocCleanCandidate.svmData->device->usmReuseInfo.recordAllocationGetFromReuse(allocCleanCandidate.allocationSize);
|
|
} else {
|
|
auto lock = memoryManager->usmReuseInfo.obtainAllocationsReuseLock();
|
|
memoryManager->usmReuseInfo.recordAllocationGetFromReuse(allocCleanCandidate.allocationSize);
|
|
}
|
|
if (enablePerformanceLogging) {
|
|
logCacheOperation({.allocationSize = allocCleanCandidate.allocationSize,
|
|
.timePoint = std::chrono::high_resolution_clock::now(),
|
|
.allocationType = allocCleanCandidate.svmData->memoryType,
|
|
.operationType = CacheOperationType::trimOld,
|
|
.isSuccess = true});
|
|
}
|
|
svmAllocsManager->freeSVMAllocImpl(allocCleanCandidate.allocation, FreePolicyType::defer, allocCleanCandidate.svmData);
|
|
if (trimAll) {
|
|
allocCleanCandidate.markForDelete();
|
|
} else {
|
|
allocations.erase(allocations.begin() + allocCleanCandidateIndex);
|
|
break;
|
|
}
|
|
}
|
|
if (trimAll) {
|
|
std::erase_if(allocations, SvmCacheAllocationInfo::isMarkedForDelete);
|
|
}
|
|
}
|
|
|
|
SvmAllocationData *SVMAllocsManager::MapBasedAllocationTracker::get(const void *ptr) {
|
|
if (allocations.size() == 0) {
|
|
return nullptr;
|
|
}
|
|
if (!ptr) {
|
|
return nullptr;
|
|
}
|
|
|
|
SvmAllocationContainer::iterator iter;
|
|
const SvmAllocationContainer::iterator end = allocations.end();
|
|
SvmAllocationData *svmAllocData;
|
|
// try faster find lookup if pointer is aligned to page
|
|
if (isAligned<MemoryConstants::pageSize>(ptr)) {
|
|
iter = allocations.find(ptr);
|
|
if (iter != end) {
|
|
return &iter->second;
|
|
}
|
|
}
|
|
// do additional check with lower bound as we may deal with pointer offset
|
|
iter = allocations.lower_bound(ptr);
|
|
if (((iter != end) && (iter->first != ptr)) ||
|
|
(iter == end)) {
|
|
if (iter == allocations.begin()) {
|
|
iter = end;
|
|
} else {
|
|
iter--;
|
|
}
|
|
}
|
|
if (iter != end) {
|
|
svmAllocData = &iter->second;
|
|
char *charPtr = reinterpret_cast<char *>(svmAllocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress());
|
|
if (ptr < (charPtr + svmAllocData->size)) {
|
|
return svmAllocData;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
void SVMAllocsManager::MapOperationsTracker::insert(SvmMapOperation mapOperation) {
|
|
operations.insert(std::make_pair(mapOperation.regionSvmPtr, mapOperation));
|
|
}
|
|
|
|
void SVMAllocsManager::MapOperationsTracker::remove(const void *regionPtr) {
|
|
SvmMapOperationsContainer::iterator iter;
|
|
iter = operations.find(regionPtr);
|
|
operations.erase(iter);
|
|
}
|
|
|
|
SvmMapOperation *SVMAllocsManager::MapOperationsTracker::get(const void *regionPtr) {
|
|
SvmMapOperationsContainer::iterator iter;
|
|
iter = operations.find(regionPtr);
|
|
if (iter == operations.end()) {
|
|
return nullptr;
|
|
}
|
|
return &iter->second;
|
|
}
|
|
|
|
void SVMAllocsManager::addInternalAllocationsToResidencyContainer(uint32_t rootDeviceIndex,
|
|
ResidencyContainer &residencyContainer,
|
|
uint32_t requestedTypesMask) {
|
|
std::shared_lock<std::shared_mutex> lock(mtx);
|
|
for (auto &allocation : this->svmAllocs.allocations) {
|
|
if (rootDeviceIndex >= allocation.second->gpuAllocations.getGraphicsAllocations().size()) {
|
|
continue;
|
|
}
|
|
|
|
if (!(static_cast<uint32_t>(allocation.second->memoryType) & requestedTypesMask) ||
|
|
(nullptr == allocation.second->gpuAllocations.getGraphicsAllocation(rootDeviceIndex))) {
|
|
continue;
|
|
}
|
|
|
|
auto alloc = allocation.second->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
|
residencyContainer.push_back(alloc);
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::makeInternalAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t requestedTypesMask) {
|
|
std::shared_lock<std::shared_mutex> lock(mtx);
|
|
for (auto &allocation : this->svmAllocs.allocations) {
|
|
if (static_cast<uint32_t>(allocation.second->memoryType) & requestedTypesMask) {
|
|
auto gpuAllocation = allocation.second->gpuAllocations.getGraphicsAllocation(commandStreamReceiver.getRootDeviceIndex());
|
|
if (gpuAllocation == nullptr) {
|
|
continue;
|
|
}
|
|
commandStreamReceiver.makeResident(*gpuAllocation);
|
|
}
|
|
}
|
|
}
|
|
|
|
SVMAllocsManager::SVMAllocsManager(MemoryManager *memoryManager)
|
|
: memoryManager(memoryManager) {
|
|
}
|
|
|
|
SVMAllocsManager::~SVMAllocsManager() = default;
|
|
|
|
void *SVMAllocsManager::createSVMAlloc(size_t size, const SvmAllocationProperties svmProperties,
|
|
const RootDeviceIndicesContainer &rootDeviceIndices,
|
|
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields) {
|
|
if (size == 0)
|
|
return nullptr;
|
|
|
|
if (rootDeviceIndices.size() > 1) {
|
|
return createZeroCopySvmAllocation(size, svmProperties, rootDeviceIndices, subdeviceBitfields);
|
|
}
|
|
if (!memoryManager->isLocalMemorySupported(*rootDeviceIndices.begin())) {
|
|
return createZeroCopySvmAllocation(size, svmProperties, rootDeviceIndices, subdeviceBitfields);
|
|
} else {
|
|
UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::notSpecified, 1, rootDeviceIndices, subdeviceBitfields);
|
|
return createUnifiedAllocationWithDeviceStorage(size, svmProperties, unifiedMemoryProperties);
|
|
}
|
|
}
|
|
|
|
void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size,
|
|
const UnifiedMemoryProperties &memoryProperties) {
|
|
bool isDiscrete = false;
|
|
if (size >= MemoryConstants::pageSize2M && !debugManager.flags.NEO_CAL_ENABLED.get()) {
|
|
for (const auto rootDeviceIndex : memoryProperties.rootDeviceIndices) {
|
|
isDiscrete |= !this->memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->capabilityTable.isIntegratedDevice;
|
|
if (isDiscrete) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
const size_t pageSizeForAlignment = isDiscrete ? MemoryConstants::pageSize2M : MemoryConstants::pageSize;
|
|
const size_t alignedSize = alignUp<size_t>(size, pageSizeForAlignment);
|
|
|
|
bool compressionEnabled = false;
|
|
AllocationType allocationType = getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, compressionEnabled);
|
|
|
|
RootDeviceIndicesContainer rootDeviceIndicesVector(memoryProperties.rootDeviceIndices);
|
|
|
|
uint32_t rootDeviceIndex = rootDeviceIndicesVector.at(0);
|
|
auto &deviceBitfield = memoryProperties.subdeviceBitfields.at(rootDeviceIndex);
|
|
|
|
AllocationProperties unifiedMemoryProperties{rootDeviceIndex,
|
|
true,
|
|
alignedSize,
|
|
allocationType,
|
|
false,
|
|
(deviceBitfield.count() > 1),
|
|
deviceBitfield};
|
|
unifiedMemoryProperties.alignment = alignUpNonZero<size_t>(memoryProperties.alignment, pageSizeForAlignment);
|
|
unifiedMemoryProperties.flags.preferCompressed = compressionEnabled;
|
|
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
|
|
unifiedMemoryProperties.flags.isUSMHostAllocation = true;
|
|
unifiedMemoryProperties.flags.isUSMDeviceAllocation = false;
|
|
unifiedMemoryProperties.cacheRegion = MemoryPropertiesHelper::getCacheRegion(memoryProperties.allocationFlags);
|
|
|
|
if (this->usmHostAllocationsCache) {
|
|
void *allocationFromCache = this->usmHostAllocationsCache->get(size, memoryProperties);
|
|
if (allocationFromCache) {
|
|
return allocationFromCache;
|
|
}
|
|
}
|
|
|
|
auto maxRootDeviceIndex = *std::max_element(rootDeviceIndicesVector.begin(), rootDeviceIndicesVector.end(), std::less<uint32_t const>());
|
|
SvmAllocationData allocData(maxRootDeviceIndex);
|
|
void *externalHostPointer = reinterpret_cast<void *>(memoryProperties.allocationFlags.hostptr);
|
|
|
|
void *usmPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, unifiedMemoryProperties, allocData.gpuAllocations, externalHostPointer);
|
|
if (!usmPtr) {
|
|
if (this->usmHostAllocationsCache) {
|
|
this->trimUSMHostAllocCache();
|
|
usmPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, unifiedMemoryProperties, allocData.gpuAllocations, externalHostPointer);
|
|
}
|
|
if (!usmPtr) {
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
allocData.cpuAllocation = nullptr;
|
|
allocData.size = size;
|
|
allocData.memoryType = memoryProperties.memoryType;
|
|
allocData.allocationFlagsProperty = memoryProperties.allocationFlags;
|
|
allocData.device = nullptr;
|
|
allocData.pageSizeForAlignment = pageSizeForAlignment;
|
|
allocData.setAllocId(++this->allocationsCounter);
|
|
|
|
insertSVMAlloc(usmPtr, allocData);
|
|
|
|
return usmPtr;
|
|
}
|
|
|
|
void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size,
|
|
const UnifiedMemoryProperties &memoryProperties) {
|
|
auto rootDeviceIndex = memoryProperties.getRootDeviceIndex();
|
|
DeviceBitfield deviceBitfield = memoryProperties.subdeviceBitfields.at(rootDeviceIndex);
|
|
constexpr size_t pageSizeForAlignment = MemoryConstants::pageSize64k;
|
|
const size_t alignedSize = alignUp<size_t>(size, pageSizeForAlignment);
|
|
|
|
auto externalPtr = reinterpret_cast<void *>(memoryProperties.allocationFlags.hostptr);
|
|
bool useExternalHostPtrForCpu = externalPtr != nullptr;
|
|
|
|
bool compressionEnabled = false;
|
|
AllocationType allocationType = getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, compressionEnabled);
|
|
|
|
bool multiStorageAllocation = (deviceBitfield.count() > 1);
|
|
|
|
AllocationProperties unifiedMemoryProperties{rootDeviceIndex,
|
|
!useExternalHostPtrForCpu, // allocateMemory
|
|
alignedSize,
|
|
allocationType,
|
|
false,
|
|
multiStorageAllocation,
|
|
deviceBitfield};
|
|
unifiedMemoryProperties.alignment = alignUpNonZero<size_t>(memoryProperties.alignment, pageSizeForAlignment);
|
|
unifiedMemoryProperties.flags.isUSMDeviceAllocation = false;
|
|
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
|
|
unifiedMemoryProperties.cacheRegion = MemoryPropertiesHelper::getCacheRegion(memoryProperties.allocationFlags);
|
|
unifiedMemoryProperties.flags.uncacheable = memoryProperties.allocationFlags.flags.locallyUncachedResource;
|
|
unifiedMemoryProperties.flags.preferCompressed = compressionEnabled || memoryProperties.allocationFlags.flags.compressedHint;
|
|
unifiedMemoryProperties.flags.preferCompressed &= memoryManager->isCompressionSupportedForShareable(memoryProperties.allocationFlags.flags.shareable);
|
|
unifiedMemoryProperties.flags.resource48Bit = memoryProperties.allocationFlags.flags.resource48Bit;
|
|
|
|
if (memoryProperties.memoryType == InternalMemoryType::deviceUnifiedMemory) {
|
|
unifiedMemoryProperties.flags.isUSMDeviceAllocation = true;
|
|
if (this->usmDeviceAllocationsCache &&
|
|
false == memoryProperties.isInternalAllocation) {
|
|
void *allocationFromCache = this->usmDeviceAllocationsCache->get(size, memoryProperties);
|
|
if (allocationFromCache) {
|
|
return allocationFromCache;
|
|
}
|
|
}
|
|
} else if (memoryProperties.memoryType == InternalMemoryType::hostUnifiedMemory) {
|
|
unifiedMemoryProperties.flags.isUSMHostAllocation = true;
|
|
} else {
|
|
unifiedMemoryProperties.flags.isUSMHostAllocation = useExternalHostPtrForCpu;
|
|
}
|
|
|
|
GraphicsAllocation *unifiedMemoryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(unifiedMemoryProperties, externalPtr);
|
|
if (!unifiedMemoryAllocation) {
|
|
if (memoryProperties.memoryType == InternalMemoryType::deviceUnifiedMemory &&
|
|
this->usmDeviceAllocationsCache) {
|
|
this->trimUSMDeviceAllocCache();
|
|
unifiedMemoryAllocation = memoryManager->allocateGraphicsMemoryWithProperties(unifiedMemoryProperties, externalPtr);
|
|
}
|
|
if (!unifiedMemoryAllocation) {
|
|
return nullptr;
|
|
}
|
|
}
|
|
setUnifiedAllocationProperties(unifiedMemoryAllocation, {});
|
|
|
|
SvmAllocationData allocData(rootDeviceIndex);
|
|
allocData.gpuAllocations.addAllocation(unifiedMemoryAllocation);
|
|
allocData.cpuAllocation = nullptr;
|
|
allocData.size = size;
|
|
allocData.pageSizeForAlignment = pageSizeForAlignment;
|
|
allocData.memoryType = memoryProperties.memoryType;
|
|
allocData.allocationFlagsProperty = memoryProperties.allocationFlags;
|
|
allocData.device = memoryProperties.device;
|
|
allocData.setAllocId(++this->allocationsCounter);
|
|
allocData.isInternalAllocation = memoryProperties.isInternalAllocation;
|
|
|
|
auto retPtr = reinterpret_cast<void *>(unifiedMemoryAllocation->getGpuAddress());
|
|
insertSVMAlloc(retPtr, allocData);
|
|
UNRECOVERABLE_IF(useExternalHostPtrForCpu && (externalPtr != retPtr));
|
|
|
|
return retPtr;
|
|
}
|
|
|
|
void *SVMAllocsManager::createSharedUnifiedMemoryAllocation(size_t size,
|
|
const UnifiedMemoryProperties &memoryProperties,
|
|
void *cmdQ) {
|
|
if (memoryProperties.rootDeviceIndices.size() > 1 && memoryProperties.device == nullptr) {
|
|
return createHostUnifiedMemoryAllocation(size, memoryProperties);
|
|
}
|
|
|
|
auto rootDeviceIndex = memoryProperties.getRootDeviceIndex();
|
|
|
|
auto supportDualStorageSharedMemory = memoryManager->isLocalMemorySupported(rootDeviceIndex);
|
|
|
|
if (debugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get() != -1) {
|
|
supportDualStorageSharedMemory = !!debugManager.flags.AllocateSharedAllocationsWithCpuAndGpuStorage.get();
|
|
}
|
|
|
|
if (supportDualStorageSharedMemory) {
|
|
bool useKmdMigration = memoryManager->isKmdMigrationAvailable(rootDeviceIndex);
|
|
void *unifiedMemoryPointer = nullptr;
|
|
|
|
if (useKmdMigration) {
|
|
unifiedMemoryPointer = createUnifiedKmdMigratedAllocation(size, {}, memoryProperties);
|
|
if (!unifiedMemoryPointer) {
|
|
return nullptr;
|
|
}
|
|
} else {
|
|
unifiedMemoryPointer = createUnifiedAllocationWithDeviceStorage(size, {}, memoryProperties);
|
|
if (!unifiedMemoryPointer) {
|
|
return nullptr;
|
|
}
|
|
|
|
UNRECOVERABLE_IF(cmdQ == nullptr);
|
|
auto pageFaultManager = this->memoryManager->getPageFaultManager();
|
|
pageFaultManager->insertAllocation(unifiedMemoryPointer, size, this, cmdQ, memoryProperties.allocationFlags);
|
|
}
|
|
|
|
auto unifiedMemoryAllocation = this->getSVMAlloc(unifiedMemoryPointer);
|
|
unifiedMemoryAllocation->memoryType = memoryProperties.memoryType;
|
|
unifiedMemoryAllocation->allocationFlagsProperty = memoryProperties.allocationFlags;
|
|
|
|
return unifiedMemoryPointer;
|
|
}
|
|
return createUnifiedMemoryAllocation(size, memoryProperties);
|
|
}
|
|
|
|
void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
|
|
|
|
auto rootDeviceIndex = unifiedMemoryProperties.getRootDeviceIndex();
|
|
auto &deviceBitfield = unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex);
|
|
constexpr size_t pageSizeForAlignment = MemoryConstants::pageSize2M;
|
|
const size_t alignedSize = alignUp<size_t>(size, pageSizeForAlignment);
|
|
AllocationProperties gpuProperties{rootDeviceIndex,
|
|
true,
|
|
alignedSize,
|
|
AllocationType::unifiedSharedMemory,
|
|
false,
|
|
false,
|
|
deviceBitfield};
|
|
|
|
gpuProperties.alignment = alignUpNonZero<size_t>(unifiedMemoryProperties.alignment, pageSizeForAlignment);
|
|
gpuProperties.flags.resource48Bit = unifiedMemoryProperties.allocationFlags.flags.resource48Bit;
|
|
auto cacheRegion = MemoryPropertiesHelper::getCacheRegion(unifiedMemoryProperties.allocationFlags);
|
|
MemoryPropertiesHelper::fillCachePolicyInProperties(gpuProperties, false, svmProperties.readOnly, false, cacheRegion);
|
|
auto initialPlacement = MemoryPropertiesHelper::getUSMInitialPlacement(unifiedMemoryProperties.allocationFlags);
|
|
MemoryPropertiesHelper::setUSMInitialPlacement(gpuProperties, initialPlacement);
|
|
GraphicsAllocation *allocationGpu = memoryManager->allocateGraphicsMemoryWithProperties(gpuProperties);
|
|
if (!allocationGpu) {
|
|
return nullptr;
|
|
}
|
|
setUnifiedAllocationProperties(allocationGpu, svmProperties);
|
|
|
|
SvmAllocationData allocData(rootDeviceIndex);
|
|
allocData.gpuAllocations.addAllocation(allocationGpu);
|
|
allocData.cpuAllocation = nullptr;
|
|
allocData.device = unifiedMemoryProperties.device;
|
|
allocData.size = size;
|
|
allocData.pageSizeForAlignment = pageSizeForAlignment;
|
|
allocData.setAllocId(++this->allocationsCounter);
|
|
|
|
auto retPtr = allocationGpu->getUnderlyingBuffer();
|
|
insertSVMAlloc(retPtr, allocData);
|
|
return retPtr;
|
|
}
|
|
|
|
void SVMAllocsManager::setUnifiedAllocationProperties(GraphicsAllocation *allocation, const SvmAllocationProperties &svmProperties) {
|
|
allocation->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
|
|
allocation->setCoherent(svmProperties.coherent);
|
|
}
|
|
|
|
void SVMAllocsManager::reinsertToAllocsForIndirectAccess(SvmAllocationData &svmData) {
|
|
std::unique_lock<std::mutex> lockForIndirect(mtxForIndirectAccess);
|
|
std::unique_lock<std::shared_mutex> lock(mtx);
|
|
for (auto alloc : svmData.gpuAllocations.getGraphicsAllocations()) {
|
|
OPTIONAL_UNRECOVERABLE_IF(nullptr == alloc);
|
|
internalAllocationsMap.insert({svmData.getAllocId(), alloc});
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::removeFromAllocsForIndirectAccess(SvmAllocationData &svmData) {
|
|
std::unique_lock<std::mutex> lockForIndirect(mtxForIndirectAccess);
|
|
std::unique_lock<std::shared_mutex> lock(mtx);
|
|
internalAllocationsMap.erase(svmData.getAllocId());
|
|
}
|
|
|
|
void SVMAllocsManager::insertSVMAlloc(const SvmAllocationData &svmAllocData) {
|
|
insertSVMAlloc(reinterpret_cast<void *>(svmAllocData.gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()), svmAllocData);
|
|
}
|
|
|
|
void SVMAllocsManager::removeSVMAlloc(const SvmAllocationData &svmAllocData) {
|
|
std::unique_lock<std::shared_mutex> lock(mtx);
|
|
internalAllocationsMap.erase(svmAllocData.getAllocId());
|
|
svmAllocs.remove(reinterpret_cast<void *>(svmAllocData.gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()));
|
|
}
|
|
|
|
bool SVMAllocsManager::freeSVMAlloc(void *ptr, bool blocking) {
|
|
if (svmDeferFreeAllocs.allocations.size() > 0) {
|
|
this->freeSVMAllocDeferImpl();
|
|
}
|
|
SvmAllocationData *svmData = getSVMAlloc(ptr);
|
|
if (svmData) {
|
|
if (InternalMemoryType::deviceUnifiedMemory == svmData->memoryType &&
|
|
this->usmDeviceAllocationsCache) {
|
|
if (this->usmDeviceAllocationsCache->insert(svmData->gpuAllocations.getDefaultGraphicsAllocation()->getUnderlyingBufferSize(), ptr, svmData, blocking)) {
|
|
return true;
|
|
}
|
|
}
|
|
if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType &&
|
|
this->usmHostAllocationsCache) {
|
|
if (this->usmHostAllocationsCache->insert(svmData->gpuAllocations.getDefaultGraphicsAllocation()->getUnderlyingBufferSize(), ptr, svmData, blocking)) {
|
|
return true;
|
|
}
|
|
}
|
|
if (blocking) {
|
|
this->freeSVMAllocImpl(ptr, FreePolicyType::blocking, svmData);
|
|
} else {
|
|
this->freeSVMAllocImpl(ptr, FreePolicyType::none, svmData);
|
|
}
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool SVMAllocsManager::freeSVMAllocDefer(void *ptr) {
|
|
|
|
if (svmDeferFreeAllocs.allocations.size() > 0) {
|
|
this->freeSVMAllocDeferImpl();
|
|
}
|
|
|
|
SvmAllocationData *svmData = getSVMAlloc(ptr);
|
|
if (svmData) {
|
|
constexpr bool waitForCompletion = false;
|
|
if (InternalMemoryType::deviceUnifiedMemory == svmData->memoryType &&
|
|
this->usmDeviceAllocationsCache) {
|
|
if (this->usmDeviceAllocationsCache->insert(svmData->gpuAllocations.getDefaultGraphicsAllocation()->getUnderlyingBufferSize(), ptr, svmData, waitForCompletion)) {
|
|
return true;
|
|
}
|
|
}
|
|
if (InternalMemoryType::hostUnifiedMemory == svmData->memoryType &&
|
|
this->usmHostAllocationsCache) {
|
|
if (this->usmHostAllocationsCache->insert(svmData->gpuAllocations.getDefaultGraphicsAllocation()->getUnderlyingBufferSize(), ptr, svmData, waitForCompletion)) {
|
|
return true;
|
|
}
|
|
}
|
|
this->freeSVMAllocImpl(ptr, FreePolicyType::defer, svmData);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void SVMAllocsManager::waitForEnginesCompletion(SvmAllocationData *allocationData) {
|
|
if (allocationData->cpuAllocation) {
|
|
this->memoryManager->waitForEnginesCompletion(*allocationData->cpuAllocation);
|
|
}
|
|
|
|
for (auto &gpuAllocation : allocationData->gpuAllocations.getGraphicsAllocations()) {
|
|
if (gpuAllocation) {
|
|
this->memoryManager->waitForEnginesCompletion(*gpuAllocation);
|
|
}
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::freeSVMAllocImpl(void *ptr, FreePolicyType policy, SvmAllocationData *svmData) {
|
|
auto allowNonBlockingFree = policy == FreePolicyType::none;
|
|
this->prepareIndirectAllocationForDestruction(svmData, allowNonBlockingFree);
|
|
|
|
if (policy == FreePolicyType::blocking) {
|
|
this->waitForEnginesCompletion(svmData);
|
|
} else if (policy == FreePolicyType::defer) {
|
|
if (svmData->cpuAllocation) {
|
|
if (this->memoryManager->allocInUse(*svmData->cpuAllocation)) {
|
|
std::lock_guard<std::shared_mutex> lock(mtx);
|
|
if (svmDeferFreeAllocs.get(ptr) == nullptr) {
|
|
this->svmDeferFreeAllocs.insert(*svmData);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
for (auto &gpuAllocation : svmData->gpuAllocations.getGraphicsAllocations()) {
|
|
if (gpuAllocation) {
|
|
if (this->memoryManager->allocInUse(*gpuAllocation)) {
|
|
std::lock_guard<std::shared_mutex> lock(mtx);
|
|
if (svmDeferFreeAllocs.get(ptr) == nullptr) {
|
|
this->svmDeferFreeAllocs.insert(*svmData);
|
|
}
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
auto pageFaultManager = this->memoryManager->getPageFaultManager();
|
|
if (svmData->cpuAllocation && pageFaultManager) {
|
|
pageFaultManager->removeAllocation(svmData->cpuAllocation->getUnderlyingBuffer());
|
|
}
|
|
if (svmData->gpuAllocations.getAllocationType() == AllocationType::svmZeroCopy) {
|
|
freeZeroCopySvmAllocation(svmData);
|
|
} else {
|
|
freeSvmAllocationWithDeviceStorage(svmData);
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::freeSVMAllocDeferImpl() {
|
|
std::vector<void *> freedPtr;
|
|
for (auto iter = svmDeferFreeAllocs.allocations.begin(); iter != svmDeferFreeAllocs.allocations.end(); ++iter) {
|
|
void *ptr = reinterpret_cast<void *>(iter->second.gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress());
|
|
this->freeSVMAllocImpl(ptr, FreePolicyType::defer, this->getSVMAlloc(ptr));
|
|
|
|
if (this->getSVMAlloc(ptr) == nullptr) {
|
|
freedPtr.push_back(ptr);
|
|
}
|
|
}
|
|
for (uint32_t i = 0; i < freedPtr.size(); ++i) {
|
|
svmDeferFreeAllocs.allocations.erase(freedPtr[i]);
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::cleanupUSMAllocCaches() {
|
|
if (this->usmDeviceAllocationsCache) {
|
|
this->usmDeviceAllocationsCache->cleanup();
|
|
this->usmDeviceAllocationsCache.reset(nullptr);
|
|
}
|
|
if (this->usmHostAllocationsCache) {
|
|
this->usmHostAllocationsCache->cleanup();
|
|
this->usmHostAllocationsCache.reset(nullptr);
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::trimUSMDeviceAllocCache() {
|
|
this->usmDeviceAllocationsCache->trim();
|
|
}
|
|
|
|
void SVMAllocsManager::trimUSMHostAllocCache() {
|
|
this->usmHostAllocationsCache->trim();
|
|
}
|
|
|
|
void *SVMAllocsManager::createZeroCopySvmAllocation(size_t size, const SvmAllocationProperties &svmProperties,
|
|
const RootDeviceIndicesContainer &rootDeviceIndices,
|
|
const std::map<uint32_t, DeviceBitfield> &subdeviceBitfields) {
|
|
|
|
auto rootDeviceIndex = *rootDeviceIndices.begin();
|
|
auto &deviceBitfield = subdeviceBitfields.at(rootDeviceIndex);
|
|
AllocationProperties properties{rootDeviceIndex,
|
|
true, // allocateMemory
|
|
size,
|
|
AllocationType::svmZeroCopy,
|
|
false, // isMultiStorageAllocation
|
|
deviceBitfield};
|
|
MemoryPropertiesHelper::fillCachePolicyInProperties(properties, false, svmProperties.readOnly, false, properties.cacheRegion);
|
|
|
|
RootDeviceIndicesContainer rootDeviceIndicesVector(rootDeviceIndices);
|
|
|
|
auto maxRootDeviceIndex = *std::max_element(rootDeviceIndices.begin(), rootDeviceIndices.end(), std::less<uint32_t const>());
|
|
SvmAllocationData allocData(maxRootDeviceIndex);
|
|
|
|
void *usmPtr = memoryManager->createMultiGraphicsAllocationInSystemMemoryPool(rootDeviceIndicesVector, properties, allocData.gpuAllocations);
|
|
if (!usmPtr) {
|
|
return nullptr;
|
|
}
|
|
for (const auto &rootDeviceIndex : rootDeviceIndices) {
|
|
auto allocation = allocData.gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
|
allocation->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
|
|
allocation->setCoherent(svmProperties.coherent);
|
|
}
|
|
allocData.size = size;
|
|
allocData.setAllocId(++this->allocationsCounter);
|
|
|
|
insertSVMAlloc(usmPtr, allocData);
|
|
return usmPtr;
|
|
}
|
|
|
|
void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
|
|
auto rootDeviceIndex = unifiedMemoryProperties.getRootDeviceIndex();
|
|
auto externalPtr = reinterpret_cast<void *>(unifiedMemoryProperties.allocationFlags.hostptr);
|
|
bool useExternalHostPtrForCpu = externalPtr != nullptr;
|
|
const size_t svmCpuAlignment = memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getProductHelper().getSvmCpuAlignment();
|
|
|
|
auto minCpuAlignment = (debugManager.flags.AlignLocalMemoryVaTo2MB.get() == 1) ? MemoryConstants::pageSize2M : MemoryConstants::pageSize64k;
|
|
|
|
const size_t effectiveSvmCpuAlignment = std::max(minCpuAlignment, svmCpuAlignment);
|
|
const size_t alignment = alignUpNonZero<size_t>(unifiedMemoryProperties.alignment, effectiveSvmCpuAlignment);
|
|
const size_t alignedCpuSize = alignUp<size_t>(size, alignment);
|
|
DeviceBitfield subDevices = unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex);
|
|
AllocationProperties cpuProperties{rootDeviceIndex,
|
|
!useExternalHostPtrForCpu, // allocateMemory
|
|
alignedCpuSize, AllocationType::svmCpu,
|
|
false, // isMultiStorageAllocation
|
|
subDevices};
|
|
cpuProperties.alignment = alignment;
|
|
cpuProperties.flags.isUSMHostAllocation = useExternalHostPtrForCpu;
|
|
cpuProperties.forceKMDAllocation = true;
|
|
cpuProperties.makeGPUVaDifferentThanCPUPtr = true;
|
|
auto cacheRegion = MemoryPropertiesHelper::getCacheRegion(unifiedMemoryProperties.allocationFlags);
|
|
MemoryPropertiesHelper::fillCachePolicyInProperties(cpuProperties, false, svmProperties.readOnly, false, cacheRegion);
|
|
GraphicsAllocation *allocationCpu = memoryManager->allocateGraphicsMemoryWithProperties(cpuProperties, externalPtr);
|
|
if (!allocationCpu) {
|
|
return nullptr;
|
|
}
|
|
setUnifiedAllocationProperties(allocationCpu, svmProperties);
|
|
void *svmPtr = allocationCpu->getUnderlyingBuffer();
|
|
UNRECOVERABLE_IF(useExternalHostPtrForCpu && (externalPtr != svmPtr));
|
|
|
|
const size_t alignedGpuSize = alignUp<size_t>(size, MemoryConstants::pageSize64k);
|
|
AllocationProperties gpuProperties{rootDeviceIndex,
|
|
false,
|
|
alignedGpuSize,
|
|
AllocationType::svmGpu,
|
|
false,
|
|
subDevices.count() > 1,
|
|
subDevices};
|
|
|
|
gpuProperties.alignment = alignment;
|
|
auto compressionSupported = false;
|
|
if (unifiedMemoryProperties.device) {
|
|
auto &gfxCoreHelper = unifiedMemoryProperties.device->getGfxCoreHelper();
|
|
auto &hwInfo = unifiedMemoryProperties.device->getHardwareInfo();
|
|
compressionSupported = gfxCoreHelper.usmCompressionSupported(hwInfo);
|
|
compressionSupported &= memoryManager->isCompressionSupportedForShareable(unifiedMemoryProperties.allocationFlags.flags.shareable);
|
|
}
|
|
gpuProperties.flags.preferCompressed = compressionSupported;
|
|
|
|
MemoryPropertiesHelper::fillCachePolicyInProperties(gpuProperties, false, svmProperties.readOnly, false, cacheRegion);
|
|
GraphicsAllocation *allocationGpu = memoryManager->allocateGraphicsMemoryWithProperties(gpuProperties, svmPtr);
|
|
if (!allocationGpu) {
|
|
memoryManager->freeGraphicsMemory(allocationCpu);
|
|
return nullptr;
|
|
}
|
|
setUnifiedAllocationProperties(allocationGpu, svmProperties);
|
|
|
|
SvmAllocationData allocData(rootDeviceIndex);
|
|
allocData.gpuAllocations.addAllocation(allocationGpu);
|
|
allocData.cpuAllocation = allocationCpu;
|
|
allocData.device = unifiedMemoryProperties.device;
|
|
allocData.pageSizeForAlignment = effectiveSvmCpuAlignment;
|
|
allocData.size = size;
|
|
allocData.setAllocId(++this->allocationsCounter);
|
|
|
|
insertSVMAlloc(svmPtr, allocData);
|
|
return svmPtr;
|
|
}
|
|
|
|
void SVMAllocsManager::freeSVMData(SvmAllocationData *svmData) {
|
|
std::unique_lock<std::mutex> lockForIndirect(mtxForIndirectAccess);
|
|
std::unique_lock<std::shared_mutex> lock(mtx);
|
|
internalAllocationsMap.erase(svmData->getAllocId());
|
|
svmAllocs.remove(reinterpret_cast<void *>(svmData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()));
|
|
}
|
|
|
|
void SVMAllocsManager::freeZeroCopySvmAllocation(SvmAllocationData *svmData) {
|
|
auto gpuAllocations = svmData->gpuAllocations;
|
|
freeSVMData(svmData);
|
|
for (const auto &graphicsAllocation : gpuAllocations.getGraphicsAllocations()) {
|
|
memoryManager->freeGraphicsMemory(graphicsAllocation);
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::initUsmDeviceAllocationsCache(Device &device) {
|
|
this->usmDeviceAllocationsCache.reset(new SvmAllocationCache);
|
|
this->usmDeviceAllocationsCache->allocations.reserve(128u);
|
|
this->usmDeviceAllocationsCache->svmAllocsManager = this;
|
|
this->usmDeviceAllocationsCache->memoryManager = memoryManager;
|
|
if (auto usmReuseCleaner = device.getExecutionEnvironment()->unifiedMemoryReuseCleaner.get()) {
|
|
usmReuseCleaner->registerSvmAllocationCache(this->usmDeviceAllocationsCache.get());
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::initUsmHostAllocationsCache() {
|
|
this->usmHostAllocationsCache.reset(new SvmAllocationCache);
|
|
this->usmHostAllocationsCache->allocations.reserve(128u);
|
|
this->usmHostAllocationsCache->svmAllocsManager = this;
|
|
this->usmHostAllocationsCache->memoryManager = memoryManager;
|
|
if (auto usmReuseCleaner = this->memoryManager->peekExecutionEnvironment().unifiedMemoryReuseCleaner.get()) {
|
|
usmReuseCleaner->registerSvmAllocationCache(this->usmHostAllocationsCache.get());
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::initUsmAllocationsCaches(Device &device) {
|
|
const bool debuggerEnabled = nullptr != device.getDebugger();
|
|
bool usmDeviceAllocationsCacheEnabled = NEO::ApiSpecificConfig::isDeviceAllocationCacheEnabled() &&
|
|
device.getProductHelper().isDeviceUsmAllocationReuseSupported() &&
|
|
!debuggerEnabled;
|
|
if (debugManager.flags.ExperimentalEnableDeviceAllocationCache.get() != -1) {
|
|
usmDeviceAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableDeviceAllocationCache.get();
|
|
}
|
|
if (usmDeviceAllocationsCacheEnabled && device.usmReuseInfo.getMaxAllocationsSavedForReuseSize() > 0u) {
|
|
device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner(device.isAnyDirectSubmissionLightEnabled());
|
|
this->initUsmDeviceAllocationsCache(device);
|
|
if (debugManager.flags.SetCommandStreamReceiver.get() > 0) {
|
|
this->usmDeviceAllocationsCache->requireUpdatingAllocsForIndirectAccess = true;
|
|
}
|
|
}
|
|
|
|
bool usmHostAllocationsCacheEnabled = NEO::ApiSpecificConfig::isHostAllocationCacheEnabled() &&
|
|
device.getProductHelper().isHostUsmAllocationReuseSupported() &&
|
|
!debuggerEnabled;
|
|
if (debugManager.flags.ExperimentalEnableHostAllocationCache.get() != -1) {
|
|
usmHostAllocationsCacheEnabled = !!debugManager.flags.ExperimentalEnableHostAllocationCache.get();
|
|
}
|
|
if (usmHostAllocationsCacheEnabled && this->memoryManager->usmReuseInfo.getMaxAllocationsSavedForReuseSize() > 0u) {
|
|
device.getExecutionEnvironment()->initializeUnifiedMemoryReuseCleaner(device.isAnyDirectSubmissionLightEnabled());
|
|
this->initUsmHostAllocationsCache();
|
|
if (debugManager.flags.SetCommandStreamReceiver.get() > 0) {
|
|
this->usmHostAllocationsCache->requireUpdatingAllocsForIndirectAccess = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::freeSvmAllocationWithDeviceStorage(SvmAllocationData *svmData) {
|
|
auto graphicsAllocations = svmData->gpuAllocations.getGraphicsAllocations();
|
|
GraphicsAllocation *cpuAllocation = svmData->cpuAllocation;
|
|
bool isImportedAllocation = svmData->isImportedAllocation;
|
|
freeSVMData(svmData);
|
|
for (auto gpuAllocation : graphicsAllocations) {
|
|
memoryManager->freeGraphicsMemory(gpuAllocation, isImportedAllocation);
|
|
}
|
|
memoryManager->freeGraphicsMemory(cpuAllocation, isImportedAllocation);
|
|
}
|
|
|
|
bool SVMAllocsManager::hasHostAllocations() {
|
|
std::shared_lock<std::shared_mutex> lock(mtx);
|
|
for (auto &allocation : this->svmAllocs.allocations) {
|
|
if (allocation.second->memoryType == InternalMemoryType::hostUnifiedMemory) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void SVMAllocsManager::makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, TaskCountType taskCount) {
|
|
std::unique_lock<std::shared_mutex> lock(mtx);
|
|
bool parseAllAllocations = false;
|
|
auto entry = indirectAllocationsResidency.find(&commandStreamReceiver);
|
|
TaskCountType previousCounter = 0;
|
|
if (entry == indirectAllocationsResidency.end()) {
|
|
parseAllAllocations = true;
|
|
|
|
InternalAllocationsTracker tracker = {};
|
|
tracker.latestResidentObjectId = this->allocationsCounter;
|
|
tracker.latestSentTaskCount = taskCount;
|
|
|
|
this->indirectAllocationsResidency.insert(std::make_pair(&commandStreamReceiver, tracker));
|
|
} else {
|
|
if (this->allocationsCounter > entry->second.latestResidentObjectId) {
|
|
parseAllAllocations = true;
|
|
previousCounter = entry->second.latestResidentObjectId;
|
|
entry->second.latestResidentObjectId = this->allocationsCounter;
|
|
}
|
|
entry->second.latestSentTaskCount = taskCount;
|
|
}
|
|
if (parseAllAllocations) {
|
|
auto currentCounter = this->allocationsCounter.load();
|
|
for (auto allocationId = static_cast<uint32_t>(previousCounter + 1); allocationId <= currentCounter; allocationId++) {
|
|
makeResidentForAllocationsWithId(allocationId, commandStreamReceiver);
|
|
}
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::prepareIndirectAllocationForDestruction(SvmAllocationData *allocationData, bool isNonBlockingFree) {
|
|
std::unique_lock<std::shared_mutex> lock(mtx);
|
|
if (this->indirectAllocationsResidency.size() > 0u) {
|
|
for (auto &internalAllocationsHandling : this->indirectAllocationsResidency) {
|
|
auto commandStreamReceiver = internalAllocationsHandling.first;
|
|
auto gpuAllocation = allocationData->gpuAllocations.getGraphicsAllocation(commandStreamReceiver->getRootDeviceIndex());
|
|
if (gpuAllocation == nullptr) {
|
|
continue;
|
|
}
|
|
|
|
// If this is non blocking free, we will wait for latest known usage of this allocation.
|
|
// However, if this is blocking free, we must select "safest" task count to wait for.
|
|
TaskCountType desiredTaskCount = std::max(internalAllocationsHandling.second.latestSentTaskCount, gpuAllocation->getTaskCount(commandStreamReceiver->getOsContext().getContextId()));
|
|
if (isNonBlockingFree) {
|
|
desiredTaskCount = gpuAllocation->getTaskCount(commandStreamReceiver->getOsContext().getContextId());
|
|
}
|
|
if (gpuAllocation->isAlwaysResident(commandStreamReceiver->getOsContext().getContextId())) {
|
|
gpuAllocation->updateResidencyTaskCount(GraphicsAllocation::objectNotResident, commandStreamReceiver->getOsContext().getContextId());
|
|
gpuAllocation->updateResidencyTaskCount(desiredTaskCount, commandStreamReceiver->getOsContext().getContextId());
|
|
gpuAllocation->updateTaskCount(desiredTaskCount, commandStreamReceiver->getOsContext().getContextId());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
SvmMapOperation *SVMAllocsManager::getSvmMapOperation(const void *ptr) {
|
|
std::shared_lock<std::shared_mutex> lock(mtx);
|
|
return svmMapOperations.get(ptr);
|
|
}
|
|
|
|
void SVMAllocsManager::insertSvmMapOperation(void *regionSvmPtr, size_t regionSize, void *baseSvmPtr, size_t offset, bool readOnlyMap) {
|
|
SvmMapOperation svmMapOperation;
|
|
svmMapOperation.regionSvmPtr = regionSvmPtr;
|
|
svmMapOperation.baseSvmPtr = baseSvmPtr;
|
|
svmMapOperation.offset = offset;
|
|
svmMapOperation.regionSize = regionSize;
|
|
svmMapOperation.readOnlyMap = readOnlyMap;
|
|
std::unique_lock<std::shared_mutex> lock(mtx);
|
|
svmMapOperations.insert(svmMapOperation);
|
|
}
|
|
|
|
void SVMAllocsManager::removeSvmMapOperation(const void *regionSvmPtr) {
|
|
std::unique_lock<std::shared_mutex> lock(mtx);
|
|
svmMapOperations.remove(regionSvmPtr);
|
|
}
|
|
|
|
AllocationType SVMAllocsManager::getGraphicsAllocationTypeAndCompressionPreference(const UnifiedMemoryProperties &unifiedMemoryProperties, bool &compressionEnabled) const {
|
|
compressionEnabled = false;
|
|
|
|
AllocationType allocationType = AllocationType::bufferHostMemory;
|
|
if (unifiedMemoryProperties.memoryType == InternalMemoryType::deviceUnifiedMemory) {
|
|
if (unifiedMemoryProperties.allocationFlags.allocFlags.allocWriteCombined) {
|
|
allocationType = AllocationType::writeCombined;
|
|
} else {
|
|
UNRECOVERABLE_IF(nullptr == unifiedMemoryProperties.device);
|
|
auto &gfxCoreHelper = unifiedMemoryProperties.device->getGfxCoreHelper();
|
|
auto &hwInfo = unifiedMemoryProperties.device->getHardwareInfo();
|
|
if (CompressionSelector::allowStatelessCompression() || gfxCoreHelper.usmCompressionSupported(hwInfo)) {
|
|
compressionEnabled = true;
|
|
}
|
|
if (unifiedMemoryProperties.requestedAllocationType != AllocationType::unknown) {
|
|
allocationType = unifiedMemoryProperties.requestedAllocationType;
|
|
} else {
|
|
allocationType = AllocationType::buffer;
|
|
}
|
|
}
|
|
}
|
|
return allocationType;
|
|
}
|
|
|
|
static uint32_t getSubDeviceId(Device &device) {
|
|
if (!device.isSubDevice()) {
|
|
uint32_t deviceBitField = static_cast<uint32_t>(device.getDeviceBitfield().to_ulong());
|
|
if (device.getDeviceBitfield().count() > 1) {
|
|
deviceBitField &= ~deviceBitField + 1;
|
|
}
|
|
return Math::log2(deviceBitField);
|
|
}
|
|
return static_cast<NEO::SubDevice *>(&device)->getSubDeviceIndex();
|
|
};
|
|
|
|
static NEO::SubDeviceIdsVec getSubDeviceIds(CommandStreamReceiver &csr) {
|
|
SubDeviceIdsVec subDeviceIds;
|
|
for (auto subDeviceId = 0u; subDeviceId < csr.getOsContext().getDeviceBitfield().size(); subDeviceId++) {
|
|
if (csr.getOsContext().getDeviceBitfield().test(subDeviceId)) {
|
|
subDeviceIds.push_back(subDeviceId);
|
|
}
|
|
}
|
|
return subDeviceIds;
|
|
};
|
|
|
|
void SVMAllocsManager::sharedSystemMemAdvise(Device &device, MemAdvise memAdviseOp, const void *ptr, const size_t size) {
|
|
|
|
// All vm_ids on a single device for shared system USM allocation
|
|
auto subDeviceIds = NEO::SubDevice::getSubDeviceIdsFromDevice(device);
|
|
|
|
memoryManager->setSharedSystemMemAdvise(ptr, size, memAdviseOp, subDeviceIds, device.getRootDeviceIndex());
|
|
}
|
|
|
|
void SVMAllocsManager::prefetchMemory(Device &device, CommandStreamReceiver &commandStreamReceiver, const void *ptr, const size_t size) {
|
|
|
|
auto svmData = getSVMAlloc(ptr);
|
|
|
|
if (!svmData) {
|
|
if (device.areSharedSystemAllocationsAllowed()) {
|
|
// Single vm_id for shared system USM allocation
|
|
auto subDeviceIds = SubDeviceIdsVec{getSubDeviceId(device)};
|
|
memoryManager->prefetchSharedSystemAlloc(ptr, size, subDeviceIds, device.getRootDeviceIndex());
|
|
}
|
|
return;
|
|
}
|
|
|
|
if ((memoryManager->isKmdMigrationAvailable(device.getRootDeviceIndex()) &&
|
|
(svmData->memoryType == InternalMemoryType::sharedUnifiedMemory))) {
|
|
auto gfxAllocation = svmData->gpuAllocations.getGraphicsAllocation(device.getRootDeviceIndex());
|
|
auto subDeviceIds = commandStreamReceiver.getActivePartitions() > 1 ? getSubDeviceIds(commandStreamReceiver) : SubDeviceIdsVec{getSubDeviceId(device)};
|
|
memoryManager->setMemPrefetch(gfxAllocation, subDeviceIds, device.getRootDeviceIndex());
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::prefetchSVMAllocs(Device &device, CommandStreamReceiver &commandStreamReceiver) {
|
|
std::shared_lock<std::shared_mutex> lock(mtx);
|
|
|
|
auto subDeviceIds = commandStreamReceiver.getActivePartitions() > 1 ? getSubDeviceIds(commandStreamReceiver) : SubDeviceIdsVec{getSubDeviceId(device)};
|
|
if (memoryManager->isKmdMigrationAvailable(device.getRootDeviceIndex())) {
|
|
for (auto &allocation : this->svmAllocs.allocations) {
|
|
NEO::SvmAllocationData svmData = *allocation.second;
|
|
|
|
if (svmData.memoryType == InternalMemoryType::sharedUnifiedMemory) {
|
|
auto gfxAllocation = svmData.gpuAllocations.getGraphicsAllocation(device.getRootDeviceIndex());
|
|
memoryManager->setMemPrefetch(gfxAllocation, subDeviceIds, device.getRootDeviceIndex());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void SVMAllocsManager::sharedSystemAtomicAccess(Device &device, AtomicAccessMode mode, const void *ptr, const size_t size) {
|
|
|
|
// All vm_ids on a single device for shared system USM allocation
|
|
auto subDeviceIds = NEO::SubDevice::getSubDeviceIdsFromDevice(device);
|
|
|
|
memoryManager->setSharedSystemAtomicAccess(ptr, size, mode, subDeviceIds, device.getRootDeviceIndex());
|
|
}
|
|
|
|
std::unique_lock<std::mutex> SVMAllocsManager::obtainOwnership() {
|
|
return std::unique_lock<std::mutex>(mtxForIndirectAccess);
|
|
}
|
|
|
|
void SVMAllocsManager::insertSVMAlloc(void *svmPtr, const SvmAllocationData &allocData) {
|
|
std::unique_lock<std::shared_mutex> lock(mtx);
|
|
this->svmAllocs.insert(svmPtr, allocData);
|
|
UNRECOVERABLE_IF(internalAllocationsMap.count(allocData.getAllocId()) > 0);
|
|
for (auto alloc : allocData.gpuAllocations.getGraphicsAllocations()) {
|
|
if (alloc != nullptr) {
|
|
internalAllocationsMap.insert({allocData.getAllocId(), alloc});
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief This method calls makeResident for allocation with specific allocId.
|
|
* Since single allocation id might be shared for different allocations in multi gpu scenario,
|
|
* this method iterates over all of these allocations and selects correct one based on device index
|
|
*
|
|
* @param[in] allocationId id of the allocation which should be resident
|
|
* @param[in] csr command stream receiver which will make allocation resident
|
|
*/
|
|
void SVMAllocsManager::makeResidentForAllocationsWithId(uint32_t allocationId, CommandStreamReceiver &csr) {
|
|
for (auto [iter, rangeEnd] = internalAllocationsMap.equal_range(allocationId); iter != rangeEnd; ++iter) {
|
|
auto gpuAllocation = iter->second;
|
|
if (gpuAllocation->getRootDeviceIndex() != csr.getRootDeviceIndex()) {
|
|
continue;
|
|
}
|
|
csr.makeResident(*gpuAllocation);
|
|
gpuAllocation->updateResidencyTaskCount(GraphicsAllocation::objectAlwaysResident, csr.getOsContext().getContextId());
|
|
gpuAllocation->setEvictable(false);
|
|
}
|
|
}
|
|
|
|
bool SVMAllocsManager::submitIndirectAllocationsAsPack(CommandStreamReceiver &csr) {
|
|
auto submitAsPack = memoryManager->allowIndirectAllocationsAsPack(csr.getRootDeviceIndex());
|
|
if (debugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
|
|
submitAsPack = !!NEO::debugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
|
|
}
|
|
|
|
if (submitAsPack) {
|
|
makeIndirectAllocationsResident(csr, csr.peekTaskCount() + 1u);
|
|
}
|
|
return submitAsPack;
|
|
}
|
|
} // namespace NEO
|