fix: store the correct pagesize in SvmAllocationData

Also use the same alignment for both CPU & GPU in shared USM

Related-To: GSD-7103, NEO-9812

Signed-off-by: Wenbin Lu <wenbin.lu@intel.com>
This commit is contained in:
Lu, Wenbin
2024-02-06 01:08:55 +00:00
committed by Compute-Runtime-Automation
parent e7850bc40c
commit 5e562ae7b0
3 changed files with 58 additions and 18 deletions

View File

@@ -77,7 +77,7 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment
if (allocationData.type == AllocationType::svmCpu) {
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex];
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
alignment = productHelper.getSvmCpuAlignment();
alignment = alignUpNonZero<size_t>(allocationData.alignment, productHelper.getSvmCpuAlignment());
sizeAligned = alignUp(allocationData.size, alignment);
}
auto cpuAllocationSize = sizeAligned;

View File

@@ -196,8 +196,8 @@ void *SVMAllocsManager::createSVMAlloc(size_t size, const SvmAllocationPropertie
void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size,
const UnifiedMemoryProperties &memoryProperties) {
size_t pageSizeForAlignment = alignUpNonZero<size_t>(memoryProperties.alignment, MemoryConstants::pageSize);
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize);
constexpr size_t pageSizeForAlignment = MemoryConstants::pageSize;
const size_t alignedSize = alignUp<size_t>(size, pageSizeForAlignment);
bool compressionEnabled = false;
AllocationType allocationType = getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, compressionEnabled);
@@ -214,7 +214,7 @@ void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size,
false,
(deviceBitfield.count() > 1) && multiOsContextSupport,
deviceBitfield};
unifiedMemoryProperties.alignment = pageSizeForAlignment;
unifiedMemoryProperties.alignment = alignUpNonZero<size_t>(memoryProperties.alignment, pageSizeForAlignment);
unifiedMemoryProperties.flags.preferCompressed = compressionEnabled;
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
unifiedMemoryProperties.flags.isUSMHostAllocation = true;
@@ -261,8 +261,8 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size,
const UnifiedMemoryProperties &memoryProperties) {
auto rootDeviceIndex = memoryProperties.getRootDeviceIndex();
DeviceBitfield deviceBitfield = memoryProperties.subdeviceBitfields.at(rootDeviceIndex);
size_t pageSizeForAlignment = alignUpNonZero<size_t>(memoryProperties.alignment, MemoryConstants::pageSize64k);
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize64k);
constexpr size_t pageSizeForAlignment = MemoryConstants::pageSize64k;
const size_t alignedSize = alignUp<size_t>(size, pageSizeForAlignment);
auto externalPtr = reinterpret_cast<void *>(memoryProperties.allocationFlags.hostptr);
bool useExternalHostPtrForCpu = externalPtr != nullptr;
@@ -288,7 +288,7 @@ void *SVMAllocsManager::createUnifiedMemoryAllocation(size_t size,
false,
multiStorageAllocation,
deviceBitfield};
unifiedMemoryProperties.alignment = pageSizeForAlignment;
unifiedMemoryProperties.alignment = alignUpNonZero<size_t>(memoryProperties.alignment, pageSizeForAlignment);
unifiedMemoryProperties.flags.isUSMDeviceAllocation = false;
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
unifiedMemoryProperties.cacheRegion = MemoryPropertiesHelper::getCacheRegion(memoryProperties.allocationFlags);
@@ -390,8 +390,8 @@ void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(size_t size, const Sv
auto rootDeviceIndex = unifiedMemoryProperties.getRootDeviceIndex();
auto &deviceBitfield = unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex);
size_t pageSizeForAlignment = std::max(alignUpNonZero<size_t>(unifiedMemoryProperties.alignment, MemoryConstants::pageSize2M), MemoryConstants::pageSize2M);
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize2M);
constexpr size_t pageSizeForAlignment = MemoryConstants::pageSize2M;
const size_t alignedSize = alignUp<size_t>(size, pageSizeForAlignment);
AllocationProperties gpuProperties{rootDeviceIndex,
true,
alignedSize,
@@ -400,7 +400,7 @@ void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(size_t size, const Sv
false,
deviceBitfield};
gpuProperties.alignment = pageSizeForAlignment;
gpuProperties.alignment = alignUpNonZero<size_t>(unifiedMemoryProperties.alignment, pageSizeForAlignment);
gpuProperties.flags.resource48Bit = unifiedMemoryProperties.allocationFlags.flags.resource48Bit;
auto cacheRegion = MemoryPropertiesHelper::getCacheRegion(unifiedMemoryProperties.allocationFlags);
MemoryPropertiesHelper::fillCachePolicyInProperties(gpuProperties, false, svmProperties.readOnly, false, cacheRegion);
@@ -602,16 +602,17 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, co
auto rootDeviceIndex = unifiedMemoryProperties.getRootDeviceIndex();
auto externalPtr = reinterpret_cast<void *>(unifiedMemoryProperties.allocationFlags.hostptr);
bool useExternalHostPtrForCpu = externalPtr != nullptr;
const auto pageSizeForAlignment = alignUpNonZero<size_t>(unifiedMemoryProperties.alignment, MemoryConstants::pageSize64k);
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize64k);
const size_t svmCpuAlignment = memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getProductHelper().getSvmCpuAlignment();
const size_t effectiveSvmCpuAlignment = std::max(MemoryConstants::pageSize64k, svmCpuAlignment);
const size_t alignment = alignUpNonZero<size_t>(unifiedMemoryProperties.alignment, effectiveSvmCpuAlignment);
const size_t alignedCpuSize = alignUp<size_t>(size, alignment);
DeviceBitfield subDevices = unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex);
auto cpuAlignment = std::max(pageSizeForAlignment, memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getProductHelper().getSvmCpuAlignment());
AllocationProperties cpuProperties{rootDeviceIndex,
!useExternalHostPtrForCpu, // allocateMemory
alignUp(alignedSize, cpuAlignment), AllocationType::svmCpu,
alignedCpuSize, AllocationType::svmCpu,
false, // isMultiStorageAllocation
subDevices};
cpuProperties.alignment = cpuAlignment;
cpuProperties.alignment = alignment;
cpuProperties.flags.isUSMHostAllocation = useExternalHostPtrForCpu;
cpuProperties.forceKMDAllocation = true;
cpuProperties.makeGPUVaDifferentThanCPUPtr = true;
@@ -636,15 +637,16 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, co
}
}
const size_t alignedGpuSize = alignUp<size_t>(size, MemoryConstants::pageSize64k);
AllocationProperties gpuProperties{rootDeviceIndex,
false,
alignedSize,
alignedGpuSize,
AllocationType::svmGpu,
false,
multiStorageAllocation,
subDevices};
gpuProperties.alignment = pageSizeForAlignment;
gpuProperties.alignment = alignment;
MemoryPropertiesHelper::fillCachePolicyInProperties(gpuProperties, false, svmProperties.readOnly, false, cacheRegion);
GraphicsAllocation *allocationGpu = memoryManager->allocateGraphicsMemoryWithProperties(gpuProperties, svmPtr);
if (!allocationGpu) {
@@ -657,7 +659,7 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, co
allocData.gpuAllocations.addAllocation(allocationGpu);
allocData.cpuAllocation = allocationCpu;
allocData.device = unifiedMemoryProperties.device;
allocData.pageSizeForAlignment = cpuAlignment;
allocData.pageSizeForAlignment = effectiveSvmCpuAlignment;
allocData.size = size;
allocData.setAllocId(++this->allocationsCounter);