diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index bda8dd0eea..045f7b42b3 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1658,7 +1658,8 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr); if (driverHandle->isRemoteResourceNeeded(ptr, gpuAllocation, allocData, device)) { if (allocData) { - gpuAllocation = driverHandle->getPeerAllocation(device, allocData, ptr, nullptr); + uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress(); + gpuAllocation = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast(pbase), nullptr); } if (gpuAllocation == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; @@ -1794,7 +1795,7 @@ inline AlignedAllocationData CommandListCoreFamily::getAlignedAll uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress(); uint64_t offset = sourcePtr - pbase; - alloc = driverHandle->getPeerAllocation(device, allocData, const_cast(buffer), &alignedPtr); + alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast(pbase), &alignedPtr); alignedPtr += offset; } else { alignedPtr = sourcePtr; diff --git a/level_zero/core/source/driver/driver_handle_imp.cpp b/level_zero/core/source/driver/driver_handle_imp.cpp index bbec314c93..5d2db26f7d 100644 --- a/level_zero/core/source/driver/driver_handle_imp.cpp +++ b/level_zero/core/source/driver/driver_handle_imp.cpp @@ -444,7 +444,7 @@ void *DriverHandleImp::importFdHandle(ze_device_handle_t hDevice, ze_ipc_memory_ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device, NEO::SvmAllocationData *allocData, - void *ptr, + void *basePtr, uintptr_t *peerGpuAddress) { if (NEO::DebugManager.flags.EnableCrossDeviceAccess.get() == 0) { return nullptr; @@ -458,7 +458,7 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device, std::unique_lock lock(deviceImp->peerAllocationsMutex); - auto iter = deviceImp->peerAllocations.allocations.find(ptr); + auto iter = deviceImp->peerAllocations.allocations.find(basePtr); if (iter != deviceImp->peerAllocations.allocations.end()) { peerAllocData = &iter->second; alloc = peerAllocData->gpuAllocations.getDefaultGraphicsAllocation(); @@ -476,7 +476,7 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device, } peerAllocData = this->getSvmAllocsManager()->getSVMAlloc(peerPtr); - deviceImp->peerAllocations.allocations.insert(std::make_pair(ptr, *peerAllocData)); + deviceImp->peerAllocations.allocations.insert(std::make_pair(basePtr, *peerAllocData)); } if (peerGpuAddress) { diff --git a/level_zero/core/source/driver/driver_handle_imp.h b/level_zero/core/source/driver/driver_handle_imp.h index 26f61e9e60..d43cecf503 100644 --- a/level_zero/core/source/driver/driver_handle_imp.h +++ b/level_zero/core/source/driver/driver_handle_imp.h @@ -63,7 +63,7 @@ struct DriverHandleImp : public DriverHandle { uintptr_t *gpuAddress) override; NEO::GraphicsAllocation *getPeerAllocation(Device *device, NEO::SvmAllocationData *allocData, - void *ptr, + void *basePtr, uintptr_t *peerGpuAddress); void createHostPointerManager(); void sortNeoDevices(std::vector> &neoDevices); diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 5e3e01d891..beb05c1044 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -623,7 +623,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress(); uint64_t offset = (uint64_t)requestedAddress - pbase; - alloc = driverHandle->getPeerAllocation(device, allocData, requestedAddress, &gpuAddress); + alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast(pbase), &gpuAddress); if (alloc == nullptr) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } diff --git a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp index edbe76ad33..9ac8c84834 100644 --- a/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/memory/test_memory.cpp @@ -18,6 +18,7 @@ #include "level_zero/core/source/cmdlist/cmdlist_hw.h" #include "level_zero/core/source/context/context_imp.h" +#include "level_zero/core/source/device/device_imp.h" #include "level_zero/core/source/driver/host_pointer_manager.h" #include "level_zero/core/source/hw_helpers/l0_hw_helper.h" #include "level_zero/core/source/memory/memory_operations_helper.h" @@ -1734,7 +1735,7 @@ class MemoryManagerOpenIpcMock : public MemoryManagerIpcMock { NEO::GraphicsAllocation *createGraphicsAllocationFromSharedHandle(osHandle handle, const AllocationProperties &properties, bool requireSpecificBitness, bool isHostIpcAllocation) override { auto alloc = new NEO::MockGraphicsAllocation(0, NEO::GraphicsAllocation::AllocationType::BUFFER, - reinterpret_cast(0x1234), + reinterpret_cast(sharedHandleAddress++), 0x1000, 0, sizeof(uint32_t), @@ -1742,6 +1743,8 @@ class MemoryManagerOpenIpcMock : public MemoryManagerIpcMock { alloc->setGpuBaseAddress(0xabcd); return alloc; } + + uint64_t sharedHandleAddress = 0x1234; }; struct ContextIpcMock : public L0::ContextImp { @@ -2167,27 +2170,47 @@ HWTEST2_F(MultipleDevicePeerAllocationTest, } HWTEST_F(MultipleDevicePeerAllocationTest, - givenDeviceAllocationPassedAsArgumentToKernelInPeerDeviceThenPeerAllocationIsUsed) { + givenDeviceAllocationPassedAsArgumentToKernelInPeerDeviceThenPeerAllocation) { DebugManager.flags.EnableCrossDeviceAccess.set(true); L0::Device *device0 = driverHandle->devices[0]; L0::Device *device1 = driverHandle->devices[1]; + L0::DeviceImp *deviceImp1 = static_cast(device1); size_t size = 1024; size_t alignment = 1u; void *ptr = nullptr; ze_device_mem_alloc_desc_t deviceDesc = {}; - ze_result_t result = context->allocDeviceMem(device0->toHandle(), - &deviceDesc, - size, alignment, &ptr); + ze_result_t result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); EXPECT_NE(nullptr, ptr); + void *ptr1 = nullptr; + result = context->allocDeviceMem(device0->toHandle(), &deviceDesc, size, alignment, &ptr1); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_NE(nullptr, ptr1); + createModuleFromBinary(device1); createKernel(); + // set argument in device 1's list with ptr from device 0: peer allocation is created result = kernel->setArgBuffer(0, sizeof(ptr), &ptr); EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(static_cast(deviceImp1->peerAllocations.getNumAllocs()), 1u); + // set argument in device 1's list with ptr1 from device 0: anoter peer allocation is created + result = kernel->setArgBuffer(0, sizeof(ptr), &ptr1); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(static_cast(deviceImp1->peerAllocations.getNumAllocs()), 2u); + + // set argument in device 1's list with ptr from device 0 plus offset: no new peer allocation is created + // since a peer allocation is already avialable + void *ptrOffset = reinterpret_cast(reinterpret_cast(ptr) + 4); + result = kernel->setArgBuffer(0, sizeof(ptr), &ptrOffset); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + EXPECT_EQ(static_cast(deviceImp1->peerAllocations.getNumAllocs()), 2u); + + result = context->freeMem(ptr1); + ASSERT_EQ(result, ZE_RESULT_SUCCESS); result = context->freeMem(ptr); ASSERT_EQ(result, ZE_RESULT_SUCCESS); }