feature usm: add debug flag to allocate shared USM in heap extended

Related-To: NEO-7665
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2023-03-29 09:22:18 +00:00
committed by Compute-Runtime-Automation
parent 1a4dda57e7
commit e4a446df58
9 changed files with 158 additions and 16 deletions

View File

@@ -483,6 +483,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableDebuggerMmapMemoryAccess, false, "Mmap used t
DECLARE_DEBUG_VARIABLE(bool, ForceDefaultGrfCompilationMode, false, "Adds build option -cl-intel-128-GRF-per-thread to force kernel compilation in Default-GRF mode")
DECLARE_DEBUG_VARIABLE(bool, ForceLargeGrfCompilationMode, false, "Adds build option -cl-intel-256-GRF-per-thread to force kernel compilation in Large-GRF mode")
DECLARE_DEBUG_VARIABLE(bool, EnableConcurrentSharedCrossP2PDeviceAccess, false, "Enables the concurrent use between host and peer devices of shared-allocations ")
DECLARE_DEBUG_VARIABLE(bool, AllocateSharedAllocationsInHeapExtended, false, "When enabled driver can allocate shared unified memory allocation in heap extended. (0 - disable, 1 - enable)")
DECLARE_DEBUG_VARIABLE(int32_t, ForceAutoGrfCompilationMode, -1, "Adds build option -*-intel-enable-auto-large-GRF-mode to force kernel compilation")
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCL21FeaturesSupport, -1, "-1: default, 0: disable, 1:enable. Force support of OpenCL 2.0 and OpenCL 2.1 API features")

View File

@@ -2128,7 +2128,19 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
auto alignment = allocationData.alignment;
auto totalSizeToAlloc = size + alignment;
auto cpuPointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
uint64_t preferredAddress = 0;
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
auto canAllocateInHeapExtended = DebugManager.flags.AllocateSharedAllocationsInHeapExtended.get();
if (canAllocateInHeapExtended && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0u && !allocationData.flags.resource48Bit) {
preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED);
}
auto cpuPointer = this->mmapFunction(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (castToUint64(cpuPointer) != preferredAddress) {
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
preferredAddress = 0;
}
if (cpuPointer == MAP_FAILED) {
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "mmap return of MAP_FAILED\n");
@@ -2160,6 +2172,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
if (ret) {
this->munmapFunction(cpuPointer, totalSizeToAlloc);
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
return nullptr;
}
@@ -2169,6 +2182,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
if (!ioctlHelper->setVmBoAdvise(bo->peekHandle(), vmAdviseAttribute, nullptr)) {
this->munmapFunction(cpuBasePointer, totalSizeToAlloc);
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
return nullptr;
}
@@ -2182,6 +2196,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
uint64_t offset = 0;
if (!retrieveMmapOffsetForBufferObject(allocationData.rootDeviceIndex, *bo, mmapOffsetWb, offset)) {
this->munmapFunction(cpuBasePointer, totalSizeToAlloc);
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
return nullptr;
}
@@ -2200,6 +2215,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, allocationData.type, bos, cpuPointer, canonizedGpuAddress, size, MemoryPool::LocalMemory);
allocation->setMmapPtr(cpuBasePointer);
allocation->setMmapSize(totalSizeToAlloc);
allocation->setReservedAddressRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc);
if (!allocation->setCacheRegion(&drm, static_cast<CacheRegion>(allocationData.cacheRegion))) {
this->munmapFunction(cpuBasePointer, totalSizeToAlloc);
for (auto bo : bos) {