Add new unified shared memory model

Related-To: NEO-3312

Change-Id: I640fb8d120729fa8bb2a23ff65c74c41ef27260d
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2020-10-22 14:13:44 +02:00
committed by sys_ocldev
parent e82f219816
commit 045632f355
8 changed files with 76 additions and 12 deletions

View File

@@ -207,6 +207,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideThreadArbitrationPolicy, -1, "-1 (dont o
DECLARE_DEBUG_VARIABLE(int32_t, OverrideAubDeviceId, -1, "-1 dont override, any other: use this value for AUB generation device id")
DECLARE_DEBUG_VARIABLE(int32_t, EnableTimestampPacket, -1, "-1: default, 0: disable, 1:enable. Write Timestamp Packet for each set of gpu walkers")
DECLARE_DEBUG_VARIABLE(int32_t, AllocateSharedAllocationsWithCpuAndGpuStorage, -1, "When enabled driver creates cpu & gpu storage for shared unified memory allocations. (-1 - devices default mode, 0 - disable, 1 - enable)")
DECLARE_DEBUG_VARIABLE(int32_t, UseKmdMigration, -1, "-1: devices default mode (currently - disabled), 0: disable - pagefault handling by UMD using handler for SIGSEGV, 1: enable - pagefault handling by KMD, GEM objects migrated by KMD upon access)")
DECLARE_DEBUG_VARIABLE(int32_t, ForceSemaphoreDelayBetweenWaits, -1, "Specifies the minimum number of microseconds allowed for command streamer to wait before re-fetching the data. 0 - poll interval will be equal to the memory latency of the read completion")
DECLARE_DEBUG_VARIABLE(int32_t, ForceLocalMemoryAccessMode, -1, "-1: don't override, 0: default rules apply, 1: CPU can access local memory, 3: CPU never accesses local memory")
DECLARE_DEBUG_VARIABLE(int32_t, ForceUserptrAlignment, -1, "-1: no force (4kb), >0: n kb alignment")

View File

@@ -94,7 +94,8 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
SEMAPHORE_BUFFER,
DEBUG_CONTEXT_SAVE_AREA,
DEBUG_SBA_TRACKING_BUFFER,
DEBUG_MODULE_AREA
DEBUG_MODULE_AREA,
UNIFIED_SHARED_MEMORY
};
~GraphicsAllocation() override;

View File

@@ -215,23 +215,72 @@ void *SVMAllocsManager::createSharedUnifiedMemoryAllocation(uint32_t rootDeviceI
}
if (supportDualStorageSharedMemory) {
auto unifiedMemoryPointer = createUnifiedAllocationWithDeviceStorage(rootDeviceIndex, size, {}, memoryProperties);
if (!unifiedMemoryPointer) {
return nullptr;
bool useKmdMigration = false;
if (DebugManager.flags.UseKmdMigration.get() != -1) {
useKmdMigration = DebugManager.flags.UseKmdMigration.get();
}
void *unifiedMemoryPointer = nullptr;
if (useKmdMigration) {
unifiedMemoryPointer = createUnifiedKmdMigratedAllocation(rootDeviceIndex, size, {}, memoryProperties);
if (!unifiedMemoryPointer) {
return nullptr;
}
} else {
unifiedMemoryPointer = createUnifiedAllocationWithDeviceStorage(rootDeviceIndex, size, {}, memoryProperties);
if (!unifiedMemoryPointer) {
return nullptr;
}
UNRECOVERABLE_IF(cmdQ == nullptr);
auto pageFaultManager = this->memoryManager->getPageFaultManager();
pageFaultManager->insertAllocation(unifiedMemoryPointer, size, this, cmdQ, memoryProperties.allocationFlags);
}
auto unifiedMemoryAllocation = this->getSVMAlloc(unifiedMemoryPointer);
unifiedMemoryAllocation->memoryType = memoryProperties.memoryType;
unifiedMemoryAllocation->allocationFlagsProperty = memoryProperties.allocationFlags;
UNRECOVERABLE_IF(cmdQ == nullptr);
auto pageFaultManager = this->memoryManager->getPageFaultManager();
pageFaultManager->insertAllocation(unifiedMemoryPointer, size, this, cmdQ, memoryProperties.allocationFlags);
return unifiedMemoryPointer;
}
return createUnifiedMemoryAllocation(rootDeviceIndex, size, memoryProperties);
}
void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(uint32_t rootDeviceIndex, size_t size, const SvmAllocationProperties &svmProperties, const UnifiedMemoryProperties &unifiedMemoryProperties) {
size_t alignedSize = alignUp<size_t>(size, 2 * MemoryConstants::megaByte);
AllocationProperties gpuProperties{rootDeviceIndex,
true,
alignedSize,
GraphicsAllocation::AllocationType::UNIFIED_SHARED_MEMORY,
unifiedMemoryProperties.subdeviceBitfield.count() > 1,
false,
unifiedMemoryProperties.subdeviceBitfield};
gpuProperties.alignment = 2 * MemoryConstants::megaByte;
MemoryPropertiesHelper::fillCachePolicyInProperties(gpuProperties, false, svmProperties.readOnly, false);
GraphicsAllocation *allocationGpu = memoryManager->allocateGraphicsMemoryWithProperties(gpuProperties);
if (!allocationGpu) {
return nullptr;
}
setUnifiedAllocationProperties(allocationGpu, svmProperties);
SvmAllocationData allocData(rootDeviceIndex);
allocData.gpuAllocations.addAllocation(allocationGpu);
allocData.cpuAllocation = nullptr;
allocData.device = unifiedMemoryProperties.device;
allocData.size = size;
std::unique_lock<SpinLock> lock(mtx);
this->SVMAllocs.insert(allocData);
return allocationGpu->getUnderlyingBuffer();
}
void SVMAllocsManager::setUnifiedAllocationProperties(GraphicsAllocation *allocation, const SvmAllocationProperties &svmProperties) {
allocation->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
allocation->setCoherent(svmProperties.coherent);
}
SvmAllocationData *SVMAllocsManager::getSVMAlloc(const void *ptr) {
std::unique_lock<SpinLock> lock(mtx);
return SVMAllocs.get(ptr);
@@ -309,8 +358,7 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(uint32_t rootDe
if (!allocationCpu) {
return nullptr;
}
allocationCpu->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
allocationCpu->setCoherent(svmProperties.coherent);
setUnifiedAllocationProperties(allocationCpu, svmProperties);
void *svmPtr = allocationCpu->getUnderlyingBuffer();
AllocationProperties gpuProperties{rootDeviceIndex,
@@ -328,8 +376,7 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(uint32_t rootDe
memoryManager->freeGraphicsMemory(allocationCpu);
return nullptr;
}
allocationGpu->setMemObjectsAllocationWithWritableFlags(!svmProperties.readOnly && !svmProperties.hostPtrReadOnly);
allocationGpu->setCoherent(svmProperties.coherent);
setUnifiedAllocationProperties(allocationGpu, svmProperties);
SvmAllocationData allocData(rootDeviceIndex);
allocData.gpuAllocations.addAllocation(allocationGpu);

View File

@@ -114,6 +114,11 @@ class SVMAllocsManager {
size_t size,
const UnifiedMemoryProperties &svmProperties,
void *cmdQ);
void *createUnifiedKmdMigratedAllocation(uint32_t rootDeviceIndex,
size_t size,
const SvmAllocationProperties &svmProperties,
const UnifiedMemoryProperties &unifiedMemoryProperties);
void setUnifiedAllocationProperties(GraphicsAllocation *allocation, const SvmAllocationProperties &svmProperties);
SvmAllocationData *getSVMAlloc(const void *ptr);
bool freeSVMAlloc(void *ptr, bool blocking);
bool freeSVMAlloc(void *ptr) { return freeSVMAlloc(ptr, false); }

View File

@@ -94,6 +94,7 @@ class DrmMemoryManager : public MemoryManager {
GraphicsAllocation *allocateShareableMemory(const AllocationData &allocationData) override;
GraphicsAllocation *allocateGraphicsMemoryForImageImpl(const AllocationData &allocationData, std::unique_ptr<Gmm> gmm) override;
GraphicsAllocation *allocateGraphicsMemoryWithGpuVa(const AllocationData &allocationData) override;
GraphicsAllocation *createSharedUnifiedMemoryAllocation(const AllocationData &allocationData);
void *lockResourceImpl(GraphicsAllocation &graphicsAllocation) override;
void *lockResourceInLocalMemoryImpl(GraphicsAllocation &graphicsAllocation);

View File

@@ -21,6 +21,10 @@ DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &
return createAllocWithAlignmentFromUserptr(allocationData, size, alignment, alignedSize, gpuAddress);
}
GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const AllocationData &allocationData) {
return nullptr;
}
GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryInDevicePool(const AllocationData &allocationData, AllocationStatus &status) {
status = AllocationStatus::RetryInNonDevicePool;
return nullptr;

View File

@@ -64,6 +64,10 @@ BufferObject *DrmMemoryManager::createBufferObjectInMemoryRegion(Drm *drm,
return bo;
}
GraphicsAllocation *DrmMemoryManager::createSharedUnifiedMemoryAllocation(const AllocationData &allocationData) {
return nullptr;
}
DrmAllocation *DrmMemoryManager::createAllocWithAlignment(const AllocationData &allocationData, size_t size, size_t alignment, size_t alignedSize, uint64_t gpuAddress) {
bool useBooMmap = this->getDrm(allocationData.rootDeviceIndex).getMemoryInfo() && allocationData.useMmapObject;