Ensure shared allocations made resident across multiple devices

Related-To: LOCI-3597
Signed-off-by: Raiyan Latif <raiyan.latif@intel.com>
This commit is contained in:
Raiyan Latif
2023-01-11 17:18:28 +00:00
committed by Compute-Runtime-Automation
parent 77501d86ba
commit 4c598395fe
6 changed files with 154 additions and 8 deletions

View File

@@ -1572,6 +1572,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
NEO::SvmAllocationData *allocData = nullptr;
bool dstAllocFound = device->getDriverHandle()->findAllocationDataForRange(ptr, size, &allocData);
if (dstAllocFound) {
if (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) {
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(device->getDriverHandle());
driverHandleImp->makeMemoryResident(device->getRootDeviceIndex(), ptr, size);
}
if (allocData->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY ||
allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) {
hostPointerNeedsFlush = true;
@@ -1891,8 +1895,17 @@ inline AlignedAllocationData CommandListCoreFamily<gfxCoreFamily>::getAlignedAll
alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), &alignedPtr);
alignedPtr += offset;
if (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) {
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(device->getDriverHandle());
driverHandleImp->makeMemoryResident(allocData->device->getRootDeviceIndex(), reinterpret_cast<void *>(ptr), bufferSize);
}
} else {
alignedPtr = sourcePtr;
if (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) {
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(device->getDriverHandle());
driverHandleImp->makeMemoryResident(device->getRootDeviceIndex(), ptr, bufferSize);
}
}
if (allocData->memoryType == InternalMemoryType::HOST_UNIFIED_MEMORY ||

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -68,6 +68,9 @@ struct DriverHandle : _ze_driver_handle_t {
virtual ze_result_t fabricVertexGetExp(uint32_t *pCount, ze_fabric_vertex_handle_t *phDevices) = 0;
virtual uint32_t getEventMaxPacketCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const = 0;
virtual uint32_t getEventMaxKernelCount(uint32_t numDevices, ze_device_handle_t *deviceHandles) const = 0;
virtual ze_result_t makeMemoryResident(uint32_t rootDeviceIndex,
void *ptr,
size_t size) = 0;
static DriverHandle *fromHandle(ze_driver_handle_t handle) { return static_cast<DriverHandle *>(handle); }
inline ze_driver_handle_t toHandle() { return this; }

View File

@@ -616,6 +616,25 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
return alloc;
}
ze_result_t DriverHandleImp::makeMemoryResident(uint32_t rootDeviceIndex, void *ptr, size_t size) {
auto allocation = this->getDriverSystemMemoryAllocation(
ptr,
size,
rootDeviceIndex,
nullptr);
if (allocation == nullptr) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
auto allocData = this->getSvmAllocsManager()->getSVMAlloc(ptr);
if (allocData && allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) {
std::lock_guard<std::mutex> lock(this->sharedMakeResidentAllocationsLock);
this->sharedMakeResidentAllocations.insert({ptr, allocation});
}
return ZE_RESULT_SUCCESS;
}
void *DriverHandleImp::importNTHandle(ze_device_handle_t hDevice, void *handle, NEO::AllocationType allocationType) {
auto neoDevice = Device::fromHandle(hDevice)->getNEODevice();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -92,6 +92,10 @@ struct DriverHandleImp : public DriverHandle {
std::mutex sharedMakeResidentAllocationsLock;
std::map<void *, NEO::GraphicsAllocation *> sharedMakeResidentAllocations;
ze_result_t makeMemoryResident(uint32_t rootDeviceIndex,
void *ptr,
size_t size) override;
std::vector<Device *> devices;
std::vector<FabricVertex *> fabricVertices;
std::vector<FabricEdge *> fabricEdges;

View File

@@ -494,14 +494,14 @@ struct ContextMakeMemoryResidentAndMigrationTests : public ContextMakeMemoryResi
struct MockResidentTestsPageFaultManager : public MockPageFaultManager {
void moveAllocationToGpuDomain(void *ptr) override {
moveAllocationToGpuDomainCalledTimes++;
migratedAddress = ptr;
migratedAddress.push_back(ptr);
}
void moveAllocationsWithinUMAllocsManagerToGpuDomain(SVMAllocsManager *unifiedMemoryManager) override {
moveAllocationsWithinUMAllocsManagerToGpuDomainCalled++;
}
uint32_t moveAllocationToGpuDomainCalledTimes = 0;
uint32_t moveAllocationsWithinUMAllocsManagerToGpuDomainCalled = 0;
void *migratedAddress = nullptr;
std::vector<void *> migratedAddress;
};
void SetUp() override {
@@ -580,7 +580,7 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 1u);
EXPECT_EQ(mockPageFaultManager->migratedAddress, ptr);
EXPECT_EQ(mockPageFaultManager->migratedAddress[0], ptr);
mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS;
res = context->evictMemory(device, ptr, size);
@@ -680,7 +680,7 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 0u);
EXPECT_EQ(mockPageFaultManager->migratedAddress, nullptr);
EXPECT_EQ(mockPageFaultManager->migratedAddress.empty(), true);
mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS;
res = context->evictMemory(device, ptr, size);
@@ -732,8 +732,11 @@ HWTEST_F(ContextMakeMemoryResidentAndMigrationTests,
nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 1u);
EXPECT_EQ(mockPageFaultManager->migratedAddress, ptr);
EXPECT_EQ(mockPageFaultManager->moveAllocationToGpuDomainCalledTimes, 2u);
if (mockPageFaultManager->migratedAddress[0] != ptr) {
EXPECT_EQ(mockPageFaultManager->migratedAddress[1], ptr);
}
mockMemoryInterface->evictResult = NEO::MemoryOperationsStatus::SUCCESS;
res = context->evictMemory(device, ptr, size);

View File

@@ -3120,6 +3120,34 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(MultipleDevicePeerAllocationTest,
givenSharedAllocationPassedToGetAllignedAllocationUsingDevice1ThenAlignedAllocationWithPeerAllocationIsReturned,
IsAtLeastSkl) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];
size_t size = 1024;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
ze_result_t result = context->allocSharedMem(device0->toHandle(),
&deviceDesc,
&hostDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u);
AlignedAllocationData outData = commandList->getAlignedAllocation(device1, ptr, size, false);
EXPECT_NE(outData.alignedAllocationPtr, 0u);
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(MultipleDevicePeerAllocationTest,
givenDeviceAllocationPassedToGetAllignedAllocationUsingDevice0ThenAlignedAllocationWithPeerAllocationIsReturned,
IsAtLeastSkl) {
@@ -3146,6 +3174,34 @@ HWTEST2_F(MultipleDevicePeerAllocationTest,
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST2_F(MultipleDevicePeerAllocationTest,
givenSharedAllocationPassedToGetAllignedAllocationUsingDevice0ThenAlignedAllocationWithPeerAllocationIsReturned,
IsAtLeastSkl) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];
size_t size = 1024;
size_t alignment = 1u;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_host_mem_alloc_desc_t hostDesc = {};
ze_result_t result = context->allocSharedMem(device1->toHandle(),
&deviceDesc,
&hostDesc,
size, alignment, &ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(nullptr, ptr);
auto commandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
commandList->initialize(device1, NEO::EngineGroupType::RenderCompute, 0u);
AlignedAllocationData outData = commandList->getAlignedAllocation(device0, ptr, size, false);
EXPECT_NE(outData.alignedAllocationPtr, 0u);
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
HWTEST_F(MultipleDevicePeerAllocationTest,
givenDeviceAllocationPassedAsArgumentToKernelInPeerDeviceThenPeerAllocationIsUsed) {
L0::Device *device0 = driverHandle->devices[0];
@@ -3662,6 +3718,54 @@ TEST_F(MemoryTest, givenNoDeviceWhenAllocatingSharedMemoryThenDeviceInAllocation
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
TEST_F(MemoryTest, givenCallToMakeMemoryResidentWithInvalidPointerThenInvalidArgumentIsReturned) {
void *ptr = nullptr;
ze_result_t res = driverHandle->makeMemoryResident(device->getRootDeviceIndex(), ptr, 1);
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, res);
}
TEST_F(MemoryTest,
givenCallToMakeMemoryResidentWithDeviceMemoryThenAllocationIsNotAddedToVectorOfResidentAllocations) {
const size_t size = 4096;
void *ptr = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
ze_result_t res = context->allocDeviceMem(device->toHandle(),
&deviceDesc,
size,
0,
&ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(context->getDriverHandle());
size_t previousSize = driverHandleImp->sharedMakeResidentAllocations.size();
res = driverHandle->makeMemoryResident(device->getRootDeviceIndex(), ptr, size);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
size_t currentSize = driverHandleImp->sharedMakeResidentAllocations.size();
EXPECT_EQ(previousSize, currentSize);
context->freeMem(ptr);
}
TEST_F(MemoryTest,
givenCallToMakeMemoryResidentWithHeapPointerThenSuccessIsReturned) {
size_t size = 4 * MemoryConstants::pageSize;
void *ptr = driverHandle->getMemoryManager()->allocateSystemMemory(size, MemoryConstants::pageSize);
ASSERT_NE(nullptr, ptr);
ze_result_t res = driverHandle->importExternalPointer(ptr, MemoryConstants::pageSize);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
res = driverHandle->makeMemoryResident(device->getRootDeviceIndex(), ptr, MemoryConstants::pageSize);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
res = driverHandle->releaseImportedPointer(ptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
driverHandle->getMemoryManager()->freeSystemMemory(ptr);
}
TEST_F(MemoryTest, givenCallToCheckMemoryAccessFromDeviceWithInvalidPointerThenInvalidArgumentIsReturned) {
void *ptr = nullptr;
ze_result_t res = driverHandle->checkMemoryAccessFromDevice(device, ptr);