mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-11 16:45:25 +08:00
feature usm: add debug flag to allocate shared USM in heap extended
Related-To: NEO-7665 Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
1a4dda57e7
commit
e4a446df58
@@ -483,6 +483,7 @@ DECLARE_DEBUG_VARIABLE(bool, EnableDebuggerMmapMemoryAccess, false, "Mmap used t
|
||||
DECLARE_DEBUG_VARIABLE(bool, ForceDefaultGrfCompilationMode, false, "Adds build option -cl-intel-128-GRF-per-thread to force kernel compilation in Default-GRF mode")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ForceLargeGrfCompilationMode, false, "Adds build option -cl-intel-256-GRF-per-thread to force kernel compilation in Large-GRF mode")
|
||||
DECLARE_DEBUG_VARIABLE(bool, EnableConcurrentSharedCrossP2PDeviceAccess, false, "Enables the concurrent use between host and peer devices of shared-allocations ")
|
||||
DECLARE_DEBUG_VARIABLE(bool, AllocateSharedAllocationsInHeapExtended, false, "When enabled driver can allocate shared unified memory allocation in heap extended. (0 - disable, 1 - enable)")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceAutoGrfCompilationMode, -1, "Adds build option -*-intel-enable-auto-large-GRF-mode to force kernel compilation")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCLVersion, 0, "Force specific OpenCL API version")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceOCL21FeaturesSupport, -1, "-1: default, 0: disable, 1:enable. Force support of OpenCL 2.0 and OpenCL 2.1 API features")
|
||||
|
||||
@@ -2128,7 +2128,19 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
|
||||
auto alignment = allocationData.alignment;
|
||||
|
||||
auto totalSizeToAlloc = size + alignment;
|
||||
auto cpuPointer = this->mmapFunction(0, totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
|
||||
uint64_t preferredAddress = 0;
|
||||
auto gfxPartition = getGfxPartition(allocationData.rootDeviceIndex);
|
||||
auto canAllocateInHeapExtended = DebugManager.flags.AllocateSharedAllocationsInHeapExtended.get();
|
||||
if (canAllocateInHeapExtended && gfxPartition->getHeapLimit(HeapIndex::HEAP_EXTENDED) > 0u && !allocationData.flags.resource48Bit) {
|
||||
preferredAddress = acquireGpuRange(totalSizeToAlloc, allocationData.rootDeviceIndex, HeapIndex::HEAP_EXTENDED);
|
||||
}
|
||||
|
||||
auto cpuPointer = this->mmapFunction(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, PROT_NONE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if (castToUint64(cpuPointer) != preferredAddress) {
|
||||
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
|
||||
preferredAddress = 0;
|
||||
}
|
||||
|
||||
if (cpuPointer == MAP_FAILED) {
|
||||
PRINT_DEBUG_STRING(DebugManager.flags.PrintDebugMessages.get(), stderr, "%s", "mmap return of MAP_FAILED\n");
|
||||
@@ -2160,6 +2172,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
|
||||
|
||||
if (ret) {
|
||||
this->munmapFunction(cpuPointer, totalSizeToAlloc);
|
||||
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -2169,6 +2182,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
|
||||
|
||||
if (!ioctlHelper->setVmBoAdvise(bo->peekHandle(), vmAdviseAttribute, nullptr)) {
|
||||
this->munmapFunction(cpuBasePointer, totalSizeToAlloc);
|
||||
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -2182,6 +2196,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
|
||||
uint64_t offset = 0;
|
||||
if (!retrieveMmapOffsetForBufferObject(allocationData.rootDeviceIndex, *bo, mmapOffsetWb, offset)) {
|
||||
this->munmapFunction(cpuBasePointer, totalSizeToAlloc);
|
||||
releaseGpuRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc, allocationData.rootDeviceIndex);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -2200,6 +2215,7 @@ GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const
|
||||
auto allocation = std::make_unique<DrmAllocation>(allocationData.rootDeviceIndex, allocationData.type, bos, cpuPointer, canonizedGpuAddress, size, MemoryPool::LocalMemory);
|
||||
allocation->setMmapPtr(cpuBasePointer);
|
||||
allocation->setMmapSize(totalSizeToAlloc);
|
||||
allocation->setReservedAddressRange(reinterpret_cast<void *>(preferredAddress), totalSizeToAlloc);
|
||||
if (!allocation->setCacheRegion(&drm, static_cast<CacheRegion>(allocationData.cacheRegion))) {
|
||||
this->munmapFunction(cpuBasePointer, totalSizeToAlloc);
|
||||
for (auto bo : bos) {
|
||||
|
||||
Reference in New Issue
Block a user