fix: add alignment support to host and shared UnifiedMemoryAllocation

Related-To: LOCI-4334

Signed-off-by: Lu, Wenbin <wenbin.lu@intel.com>
This commit is contained in:
Lu, Wenbin 2023-05-02 18:42:33 +00:00 committed by Compute-Runtime-Automation
parent 41478c5972
commit c3df92ac41
6 changed files with 133 additions and 13 deletions

View File

@ -1170,7 +1170,7 @@ TEST_F(ZexHostPointerTests, whenAllocatingSharedMemoryWithUseHostPtrFlagThenCrea
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
TEST_F(MemoryTest, whenAllocatingDeviceMemoryThenAlignmentIsPassedCorrectly) {
TEST_F(MemoryTest, whenAllocatingDeviceMemoryThenAlignmentIsPassedCorrectlyAndMemoryIsAligned) {
const size_t size = 1;
ze_device_mem_alloc_desc_t deviceDesc = {};
@ -1189,6 +1189,67 @@ TEST_F(MemoryTest, whenAllocatingDeviceMemoryThenAlignmentIsPassedCorrectly) {
ze_result_t result = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
if (alignment != 0) {
EXPECT_EQ(reinterpret_cast<uintptr_t>(ptr) & (~(alignment - 1)), reinterpret_cast<uintptr_t>(ptr));
}
result = context->freeMem(ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} while (alignment != 0);
}
TEST_F(MemoryTest, whenAllocatingHostMemoryThenAlignmentIsPassedCorrectlyAndMemoryIsAligned) {
const size_t size = 1;
ze_host_mem_alloc_desc_t hostDesc = {};
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
hostDesc.pNext = nullptr;
auto memoryManager = static_cast<MockMemoryManager *>(neoDevice->getMemoryManager());
size_t alignment = 8 * MemoryConstants::megaByte;
do {
alignment >>= 1;
memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) {
EXPECT_EQ(properties.alignment, alignUp<size_t>(alignment, MemoryConstants::pageSize));
};
void *ptr = nullptr;
ze_result_t result = context->allocHostMem(&hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
if (alignment != 0) {
EXPECT_EQ(reinterpret_cast<uintptr_t>(ptr) & (~(alignment - 1)), reinterpret_cast<uintptr_t>(ptr));
}
result = context->freeMem(ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} while (alignment != 0);
}
TEST_F(MemoryTest, whenAllocatingSharedMemoryThenAlignmentIsPassedCorrectlyAndMemoryIsAligned) {
const size_t size = 1;
ze_device_mem_alloc_desc_t deviceDesc = {};
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
deviceDesc.pNext = nullptr;
ze_host_mem_alloc_desc_t hostDesc = {};
hostDesc.stype = ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC;
hostDesc.pNext = nullptr;
auto memoryManager = static_cast<MockMemoryManager *>(neoDevice->getMemoryManager());
size_t alignment = 8 * MemoryConstants::megaByte;
do {
alignment >>= 1;
memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) {
EXPECT_EQ(properties.alignment, alignUp<size_t>(alignment, MemoryConstants::pageSize64k));
};
void *ptr = nullptr;
ze_result_t result = context->allocSharedMem(device->toHandle(), &deviceDesc, &hostDesc, size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
if (alignment != 0) {
EXPECT_EQ(reinterpret_cast<uintptr_t>(ptr) & (~(alignment - 1)), reinterpret_cast<uintptr_t>(ptr));
}
result = context->freeMem(ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
} while (alignment != 0);

View File

@ -453,7 +453,8 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateMemoryByKMD(const Allocatio
GraphicsAllocation *alloc = nullptr;
auto ptr = allocateSystemMemory(alignUp(allocationData.size, MemoryConstants::pageSize), MemoryConstants::pageSize);
const size_t alignment = std::max(allocationData.alignment, MemoryConstants::pageSize);
auto ptr = allocateSystemMemory(alignUp(allocationData.size, alignment), alignment);
if (ptr != nullptr) {
alloc = createMemoryAllocation(allocationData.type, ptr, ptr, reinterpret_cast<uint64_t>(ptr), allocationData.size,
counter, MemoryPool::SystemCpuInaccessible, allocationData.rootDeviceIndex, allocationData.flags.uncacheable, allocationData.flags.flushL3, false);

View File

@ -188,8 +188,8 @@ void *SVMAllocsManager::createSVMAlloc(size_t size, const SvmAllocationPropertie
void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size,
const UnifiedMemoryProperties &memoryProperties) {
size_t pageSizeForAlignment = MemoryConstants::pageSize;
size_t alignedSize = alignUp<size_t>(size, pageSizeForAlignment);
size_t pageSizeForAlignment = alignUp<size_t>(memoryProperties.alignment, MemoryConstants::pageSize);
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize);
bool compressionEnabled = false;
AllocationType allocationType = getGraphicsAllocationTypeAndCompressionPreference(memoryProperties, compressionEnabled);
@ -206,6 +206,7 @@ void *SVMAllocsManager::createHostUnifiedMemoryAllocation(size_t size,
false,
(deviceBitfield.count() > 1) && multiOsContextSupport,
deviceBitfield};
unifiedMemoryProperties.alignment = pageSizeForAlignment;
unifiedMemoryProperties.flags.preferCompressed = compressionEnabled;
unifiedMemoryProperties.flags.shareable = memoryProperties.allocationFlags.flags.shareable;
unifiedMemoryProperties.flags.isUSMHostAllocation = true;
@ -370,8 +371,8 @@ void *SVMAllocsManager::createUnifiedKmdMigratedAllocation(size_t size, const Sv
? unifiedMemoryProperties.device->getRootDeviceIndex()
: *unifiedMemoryProperties.rootDeviceIndices.begin();
auto &deviceBitfield = unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex);
size_t pageSizeForAlignment = 2 * MemoryConstants::megaByte;
size_t alignedSize = alignUp<size_t>(size, pageSizeForAlignment);
size_t pageSizeForAlignment = alignUp<size_t>(unifiedMemoryProperties.alignment, 2 * MemoryConstants::megaByte);
size_t alignedSize = alignUp<size_t>(size, 2 * MemoryConstants::megaByte);
AllocationProperties gpuProperties{rootDeviceIndex,
true,
alignedSize,
@ -575,15 +576,15 @@ void *SVMAllocsManager::createUnifiedAllocationWithDeviceStorage(size_t size, co
: *unifiedMemoryProperties.rootDeviceIndices.begin();
auto externalPtr = reinterpret_cast<void *>(unifiedMemoryProperties.allocationFlags.hostptr);
bool useExternalHostPtrForCpu = externalPtr != nullptr;
constexpr auto pageSizeForAlignment = MemoryConstants::pageSize64k;
size_t alignedSize = alignUp<size_t>(size, pageSizeForAlignment);
size_t pageSizeForAlignment = alignUp<size_t>(unifiedMemoryProperties.alignment, MemoryConstants::pageSize64k);
size_t alignedSize = alignUp<size_t>(size, MemoryConstants::pageSize64k);
DeviceBitfield subDevices = unifiedMemoryProperties.subdeviceBitfields.at(rootDeviceIndex);
AllocationProperties cpuProperties{rootDeviceIndex,
!useExternalHostPtrForCpu, // allocateMemory
alignedSize, AllocationType::SVM_CPU,
false, // isMultiStorageAllocation
subDevices};
cpuProperties.alignment = memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getProductHelper().getSvmCpuAlignment();
cpuProperties.alignment = std::max(pageSizeForAlignment, memoryManager->peekExecutionEnvironment().rootDeviceEnvironments[rootDeviceIndex]->getProductHelper().getSvmCpuAlignment());
cpuProperties.flags.isUSMHostAllocation = useExternalHostPtrForCpu;
cpuProperties.forceKMDAllocation = true;
cpuProperties.makeGPUVaDifferentThanCPUPtr = true;

View File

@ -626,7 +626,7 @@ GraphicsAllocation *DrmMemoryManager::allocateMemoryByKMD(const AllocationData &
auto gmm = std::make_unique<Gmm>(executionEnvironment.rootDeviceEnvironments[allocationData.rootDeviceIndex]->getGmmHelper(), allocationData.hostPtr,
allocationData.size, 0u, CacheSettingsHelper::getGmmUsageType(allocationData.type, allocationData.flags.uncacheable, productHelper), false, systemMemoryStorageInfo, true);
size_t bufferSize = allocationData.size;
uint64_t gpuRange = acquireGpuRange(bufferSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD64KB);
uint64_t gpuRange = acquireGpuRangeWithCustomAlignment(bufferSize, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD64KB, allocationData.alignment);
GemCreate create{};
create.size = bufferSize;
@ -1595,6 +1595,9 @@ AllocationStatus getGpuAddress(const AlignmentSelector &alignmentSelector, HeapA
alignment.heap = HeapIndex::HEAP_EXTENDED;
}
if (alignment.alignment < allocationData.alignment) {
alignment.alignment = allocationData.alignment;
}
gpuAddress = gmmHelper->canonize(gfxPartition->heapAllocateWithCustomAlignment(alignment.heap, sizeAllocated, alignment.alignment));
break;
}

View File

@ -19,7 +19,7 @@ bool operator<(const HeapChunk &hc1, const HeapChunk &hc2) {
}
uint64_t HeapAllocator::allocateWithCustomAlignment(size_t &sizeToAllocate, size_t alignment) {
if (alignment == 0) {
if (alignment < this->allocationAlignment) {
alignment = this->allocationAlignment;
}

View File

@ -237,7 +237,7 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenForceMemoryPrefetchForKmdMigratedShared
svmManager->freeSVMAlloc(ptr);
}
TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenSharedUnifiedMemoryAllocationsAreAlignedCorrectly) {
TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenUnifiedMemoryAllocationsAreAlignedCorrectly) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
auto device = deviceFactory->rootDevices[0];
auto memoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
@ -251,7 +251,7 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenSharedUnifiedMemoryAllocat
memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) {
EXPECT_EQ(properties.alignment, alignUp<size_t>(alignment, MemoryConstants::pageSize64k));
};
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, alignment, rootDeviceIndices, deviceBitfields);
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::DEVICE_UNIFIED_MEMORY, alignment, rootDeviceIndices, deviceBitfields);
unifiedMemoryProperties.device = device;
auto ptr = svmManager->createUnifiedMemoryAllocation(1, unifiedMemoryProperties);
EXPECT_NE(nullptr, ptr);
@ -261,3 +261,57 @@ TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenSharedUnifiedMemoryAllocat
svmManager->freeSVMAlloc(ptr);
} while (alignment != 0);
}
TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenHostUnifiedMemoryAllocationsAreAlignedCorrectly) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
auto device = deviceFactory->rootDevices[0];
auto memoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
auto svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager, false);
auto csr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
csr->setupContext(*device->getDefaultEngine().osContext);
size_t alignment = 8 * MemoryConstants::megaByte;
do {
alignment >>= 1;
memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) {
EXPECT_EQ(properties.alignment, alignUp<size_t>(alignment, MemoryConstants::pageSize));
};
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY, alignment, rootDeviceIndices, deviceBitfields);
unifiedMemoryProperties.device = device;
auto ptr = svmManager->createHostUnifiedMemoryAllocation(1, unifiedMemoryProperties);
EXPECT_NE(nullptr, ptr);
if (alignment != 0) {
EXPECT_EQ(reinterpret_cast<uintptr_t>(ptr) & (~(alignment - 1)), reinterpret_cast<uintptr_t>(ptr));
}
svmManager->freeSVMAlloc(ptr);
} while (alignment != 0);
}
TEST_F(SVMLocalMemoryAllocatorTest, givenAlignmentThenSharedUnifiedMemoryAllocationsAreAlignedCorrectly) {
std::unique_ptr<UltDeviceFactory> deviceFactory(new UltDeviceFactory(1, 2));
auto device = deviceFactory->rootDevices[0];
auto memoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
auto svmManager = std::make_unique<MockSVMAllocsManager>(memoryManager, false);
auto csr = std::make_unique<MockCommandStreamReceiver>(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield());
csr->setupContext(*device->getDefaultEngine().osContext);
void *cmdQ = reinterpret_cast<void *>(0x12345);
auto mockPageFaultManager = new MockPageFaultManager();
memoryManager->pageFaultManager.reset(mockPageFaultManager);
size_t alignment = 8 * MemoryConstants::megaByte;
do {
alignment >>= 1;
memoryManager->validateAllocateProperties = [alignment](const AllocationProperties &properties) {
EXPECT_EQ(properties.alignment, alignUp<size_t>(alignment, MemoryConstants::pageSize64k));
};
SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::SHARED_UNIFIED_MEMORY, alignment, rootDeviceIndices, deviceBitfields);
unifiedMemoryProperties.device = device;
auto ptr = svmManager->createSharedUnifiedMemoryAllocation(1, unifiedMemoryProperties, cmdQ);
EXPECT_NE(nullptr, ptr);
if (alignment != 0) {
EXPECT_EQ(reinterpret_cast<uintptr_t>(ptr) & (~(alignment - 1)), reinterpret_cast<uintptr_t>(ptr));
}
svmManager->freeSVMAlloc(ptr);
} while (alignment != 0);
}