feature: improve logic to check for P2P access

Related-To: NEO-15711

Signed-off-by: Alicja Lukaszewicz <alicja.lukaszewicz@intel.com>
This commit is contained in:
Alicja Lukaszewicz
2025-10-07 13:24:05 +00:00
committed by Compute-Runtime-Automation
parent 467908cc23
commit dd252e7852
21 changed files with 632 additions and 701 deletions

View File

@@ -61,6 +61,7 @@
#include "level_zero/tools/source/debug/debug_session.h"
#include "level_zero/tools/source/metrics/metric.h"
#include "level_zero/tools/source/sysman/sysman.h"
#include "level_zero/ze_api.h"
#include "level_zero/ze_intel_gpu.h"
#include "encode_surface_state_args.h"
@@ -96,116 +97,16 @@ ze_result_t DeviceImp::getStatus() {
return ZE_RESULT_SUCCESS;
}
bool DeviceImp::submitCopyForP2P(DeviceImp *peerDevice, ze_result_t &ret) {
auto canAccessPeer = false;
ze_command_list_handle_t commandList = nullptr;
ze_command_list_desc_t listDescriptor = {};
listDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
listDescriptor.pNext = nullptr;
listDescriptor.flags = 0;
listDescriptor.commandQueueGroupOrdinal = 0;
ze_command_queue_handle_t commandQueue = nullptr;
ze_command_queue_desc_t queueDescriptor = {};
queueDescriptor.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC;
queueDescriptor.pNext = nullptr;
queueDescriptor.flags = 0;
queueDescriptor.mode = ZE_COMMAND_QUEUE_MODE_DEFAULT;
queueDescriptor.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
queueDescriptor.ordinal = 0;
queueDescriptor.index = 0;
ret = this->createInternalCommandList(&listDescriptor, &commandList);
UNRECOVERABLE_IF(ret != ZE_RESULT_SUCCESS);
ret = this->createInternalCommandQueue(&queueDescriptor, &commandQueue);
UNRECOVERABLE_IF(ret != ZE_RESULT_SUCCESS);
auto driverHandle = this->getDriverHandle();
DriverHandleImp *driverHandleImp = static_cast<DriverHandleImp *>(driverHandle);
ze_context_handle_t context;
ze_context_desc_t contextDesc = {};
contextDesc.stype = ZE_STRUCTURE_TYPE_CONTEXT_DESC;
driverHandleImp->createContext(&contextDesc, 0u, nullptr, &context);
ContextImp *contextImp = static_cast<ContextImp *>(context);
void *memory = nullptr;
void *peerMemory = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
deviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
deviceDesc.ordinal = 0;
deviceDesc.flags = 0;
deviceDesc.pNext = nullptr;
ze_device_mem_alloc_desc_t peerDeviceDesc = {};
peerDeviceDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
peerDeviceDesc.ordinal = 0;
peerDeviceDesc.flags = 0;
peerDeviceDesc.pNext = nullptr;
contextImp->allocDeviceMem(this->toHandle(), &deviceDesc, 8, 1, &memory);
contextImp->allocDeviceMem(peerDevice->toHandle(), &peerDeviceDesc, 8, 1, &peerMemory);
CmdListMemoryCopyParams memoryCopyParams = {};
ret = L0::CommandList::fromHandle(commandList)->appendMemoryCopy(peerMemory, memory, 8, nullptr, 0, nullptr, memoryCopyParams);
L0::CommandList::fromHandle(commandList)->close();
if (ret == ZE_RESULT_SUCCESS) {
ret = L0::CommandQueue::fromHandle(commandQueue)->executeCommandLists(1, &commandList, nullptr, true, nullptr, nullptr);
if (ret == ZE_RESULT_SUCCESS) {
ret = L0::CommandQueue::fromHandle(commandQueue)->synchronize(std::numeric_limits<uint64_t>::max());
if (ret == ZE_RESULT_SUCCESS) {
canAccessPeer = true;
}
}
}
contextImp->freeMem(peerMemory);
contextImp->freeMem(memory);
L0::CommandList::fromHandle(commandList)->destroy();
L0::CommandQueue::fromHandle(commandQueue)->destroy();
L0::Context::fromHandle(context)->destroy();
if (ret != ZE_RESULT_ERROR_DEVICE_LOST) {
ret = ZE_RESULT_SUCCESS;
}
return canAccessPeer;
}
ze_result_t DeviceImp::canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) {
bool canAccess = false;
bool retVal = neoDevice->canAccessPeer(queryPeerAccess, fromHandle(hPeerDevice)->getNEODevice(), canAccess);
*value = neoDevice->canAccessPeer(queryPeerAccess, freeMemoryAllocation, fromHandle(hPeerDevice)->getNEODevice());
*value = canAccess;
return retVal ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_DEVICE_LOST;
return ZE_RESULT_SUCCESS;
}
bool DeviceImp::queryPeerAccess(NEO::Device &device, NEO::Device &peerDevice, bool &canAccess) {
ze_result_t retVal = ZE_RESULT_SUCCESS;
auto csr = device.getInternalEngine().commandStreamReceiver;
if (!csr->isHardwareMode()) {
canAccess = false;
return false;
}
void DeviceImp::freeMemoryAllocation(NEO::Device &device, void *ptr) {
auto deviceImp = device.getSpecializedDevice<DeviceImp>();
auto peerDeviceImp = peerDevice.getSpecializedDevice<DeviceImp>();
uint32_t latency = std::numeric_limits<uint32_t>::max();
uint32_t bandwidth = 0;
ze_result_t result = deviceImp->queryFabricStats(peerDeviceImp, latency, bandwidth);
if (result == ZE_RESULT_ERROR_UNSUPPORTED_FEATURE || bandwidth == 0) {
canAccess = deviceImp->submitCopyForP2P(peerDeviceImp, retVal);
} else {
canAccess = true;
}
return retVal == ZE_RESULT_SUCCESS;
auto context = Context::fromHandle(deviceImp->getDriverHandle()->getDefaultContext());
context->freeMem(ptr);
}
ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,

View File

@@ -47,7 +47,6 @@ struct Image;
struct DeviceImp : public Device, NEO::NonCopyableAndNonMovableClass {
DeviceImp();
ze_result_t getStatus() override;
MOCKABLE_VIRTUAL ze_result_t queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth);
ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) override;
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override;
@@ -194,10 +193,10 @@ struct DeviceImp : public Device, NEO::NonCopyableAndNonMovableClass {
uint32_t getCopyEngineOrdinal() const;
std::optional<uint32_t> tryGetCopyEngineOrdinal() const;
void bcsSplitReleaseResources() override;
static bool queryPeerAccess(NEO::Device &device, NEO::Device &peerDevice, bool &canAccess);
static bool queryPeerAccess(NEO::Device &device, NEO::Device &peerDevice, void **handlePtr, uint64_t *handle);
static void freeMemoryAllocation(NEO::Device &device, void *memoryAllocation);
protected:
bool submitCopyForP2P(DeviceImp *hPeerDevice, ze_result_t &result);
ze_result_t getGlobalTimestampsUsingSubmission(uint64_t *hostTimestamp, uint64_t *deviceTimestamp);
ze_result_t getGlobalTimestampsUsingOsInterface(uint64_t *hostTimestamp, uint64_t *deviceTimestamp);
const char *getDeviceMemoryName();

View File

@@ -5,6 +5,10 @@
*
*/
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/driver_model_type.h"
#include "shared/source/os_interface/os_interface.h"
#include "level_zero/core/source/device/device_imp.h"
#include "level_zero/core/source/device/device_imp_drm/device_imp_peer.h"
@@ -27,8 +31,11 @@ ze_result_t DeviceImp::getExternalMemoryProperties(ze_device_external_memory_pro
return ZE_RESULT_SUCCESS;
}
ze_result_t DeviceImp::queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) {
return queryFabricStatsDrm(this, pPeerDevice, latency, bandwidth);
bool DeviceImp::queryPeerAccess(NEO::Device &device, NEO::Device &peerDevice, void **handlePtr, uint64_t *handle) {
if (device.getRootDeviceEnvironment().osInterface) {
return queryPeerAccessDrm(device, peerDevice, handlePtr, handle);
}
return false;
}
} // namespace L0

View File

@@ -7,6 +7,7 @@
#include "level_zero/core/source/device/device_imp_drm/device_imp_peer.h"
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/os_interface/linux/drm_neo.h"
@@ -14,6 +15,7 @@
#include "shared/source/os_interface/linux/sys_calls.h"
#include "shared/source/utilities/directory.h"
#include "level_zero/core/source/context/context_imp.h"
#include "level_zero/core/source/device/device_imp.h"
#include <fcntl.h>
@@ -26,11 +28,6 @@ const std::string fabricIdFile = "/iaf_fabric_id";
ze_result_t queryFabricStatsDrm(DeviceImp *pSourceDevice, DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) {
auto &osPeerInterface = pPeerDevice->getNEODevice()->getRootDeviceEnvironment().osInterface;
if (osPeerInterface == nullptr) {
return ZE_RESULT_ERROR_UNINITIALIZED;
}
auto pPeerDrm = osPeerInterface->getDriverModel()->as<NEO::Drm>();
auto peerDevicePath = pPeerDrm->getSysFsPciPath();
@@ -73,4 +70,51 @@ ze_result_t queryFabricStatsDrm(DeviceImp *pSourceDevice, DeviceImp *pPeerDevice
return ZE_RESULT_SUCCESS;
}
bool queryPeerAccessDrm(NEO::Device &device, NEO::Device &peerDevice, void **handlePtr, uint64_t *handle) {
auto deviceImp = device.getSpecializedDevice<DeviceImp>();
auto peerImp = peerDevice.getSpecializedDevice<DeviceImp>();
uint32_t latency = std::numeric_limits<uint32_t>::max();
uint32_t bandwidth = 0;
ze_result_t fabricResult = queryFabricStatsDrm(deviceImp, peerImp, latency, bandwidth);
if (fabricResult == ZE_RESULT_SUCCESS) {
return true;
}
auto driverHandle = static_cast<DriverHandleImp *>(deviceImp->getDriverHandle());
auto context = static_cast<ContextImp *>(driverHandle->getDefaultContext());
if (*handlePtr == nullptr) {
ze_external_memory_export_desc_t exportDesc = {};
exportDesc.stype = ZE_STRUCTURE_TYPE_EXTERNAL_MEMORY_EXPORT_DESC;
exportDesc.flags = ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF;
ze_device_mem_alloc_desc_t deviceAllocDesc = {};
deviceAllocDesc.stype = ZE_STRUCTURE_TYPE_DEVICE_MEM_ALLOC_DESC;
deviceAllocDesc.pNext = &exportDesc;
ze_result_t allocResult = context->allocDeviceMem(deviceImp->toHandle(), &deviceAllocDesc, 1u, 1u, handlePtr);
if (allocResult != ZE_RESULT_SUCCESS) {
return false;
}
const auto alloc = driverHandle->svmAllocsManager->getSVMAlloc(*handlePtr);
auto handleResult = alloc->gpuAllocations.getDefaultGraphicsAllocation()->peekInternalHandle(driverHandle->getMemoryManager(), *handle);
if (handleResult < 0) {
return false;
}
}
ze_ipc_memory_flags_t flags = {};
NEO::SvmAllocationData allocDataInternal(peerDevice.getRootDeviceIndex());
void *importedPtr = driverHandle->importFdHandle(&peerDevice, flags, *handle, NEO::AllocationType::buffer, nullptr, nullptr, allocDataInternal);
bool canAccess = importedPtr != nullptr;
if (canAccess) {
context->freeMem(importedPtr);
}
return canAccess;
}
} // namespace L0

View File

@@ -6,11 +6,17 @@
*/
#pragma once
#include <level_zero/ze_api.h>
namespace NEO {
class Device;
}
namespace L0 {
struct DeviceImp;
ze_result_t queryFabricStatsDrm(DeviceImp *pSourceDevice, DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth);
bool queryPeerAccessDrm(NEO::Device &device, NEO::Device &peerDevice, void **handlePtr, uint64_t *handle);
} // namespace L0

View File

@@ -68,15 +68,14 @@ ze_result_t DeviceImp::getExternalMemoryProperties(ze_device_external_memory_pro
return ZE_RESULT_SUCCESS;
}
ze_result_t DeviceImp::queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) {
NEO::Device *activeDevice = getActiveDevice();
if (activeDevice->getRootDeviceEnvironment().osInterface) {
NEO::DriverModelType driverType = neoDevice->getRootDeviceEnvironment().osInterface->getDriverModel()->getDriverModelType();
bool DeviceImp::queryPeerAccess(NEO::Device &device, NEO::Device &peerDevice, void **handlePtr, uint64_t *handle) {
if (device.getRootDeviceEnvironment().osInterface) {
NEO::DriverModelType driverType = device.getRootDeviceEnvironment().osInterface->getDriverModel()->getDriverModelType();
if (driverType == NEO::DriverModelType::drm) {
return queryFabricStatsDrm(this, pPeerDevice, latency, bandwidth);
return queryPeerAccessDrm(device, peerDevice, handlePtr, handle);
}
}
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
return false;
}
} // namespace L0

View File

@@ -42,8 +42,8 @@ ze_result_t DeviceImp::getExternalMemoryProperties(ze_device_external_memory_pro
return ZE_RESULT_SUCCESS;
}
ze_result_t DeviceImp::queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
bool DeviceImp::queryPeerAccess(NEO::Device &device, NEO::Device &peerDevice, void **handlePtr, uint64_t *handle) {
return false;
}
} // namespace L0

View File

@@ -89,7 +89,7 @@ void DriverImp::initialize(ze_result_t *result) {
neoDeviceToExpose.push_back(Device::fromHandle(deviceToExpose)->getNEODevice());
}
NEO::Device::initializePeerAccessForDevices(DeviceImp::queryPeerAccess, neoDeviceToExpose);
NEO::Device::initializePeerAccessForDevices(DeviceImp::queryPeerAccess, DeviceImp::freeMemoryAllocation, neoDeviceToExpose);
}
}

View File

@@ -105,6 +105,7 @@ struct MockDeviceImp : public L0::DeviceImp {
using Base::adjustCommandQueueDesc;
using Base::debugSession;
using Base::deviceInOrderCounterAllocator;
using Base::freeMemoryAllocation;
using Base::getNEODevice;
using Base::hostInOrderCounterAllocator;
using Base::implicitScalingCapable;

View File

@@ -43,6 +43,7 @@ struct Mock<DriverHandle> : public DriverHandle {
ADDMETHOD_NOBASE(releaseImportedPointer, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr))
ADDMETHOD_NOBASE(getHostPointerBaseAddress, ze_result_t, ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, (void *ptr, void **baseAddress))
ADDMETHOD_NOBASE(findHostPointerAllocation, NEO::GraphicsAllocation *, nullptr, (void *ptr, size_t size, uint32_t rootDeviceIndex))
ADDMETHOD_NOBASE(importFdHandle, void *, nullptr, (NEO::Device * neoDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::AllocationType allocationType, void *basePointer, NEO::GraphicsAllocation **pAlloc, NEO::SvmAllocationData &mappedPeerAllocData))
ADDMETHOD_CONST_NOBASE(getEventMaxPacketCount, uint32_t, 8, (uint32_t, ze_device_handle_t *))
ADDMETHOD_CONST_NOBASE(getEventMaxKernelCount, uint32_t, 3, (uint32_t, ze_device_handle_t *))

View File

@@ -0,0 +1,12 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(UNIX)
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_mt_device_drm.cpp
)
endif()

View File

@@ -0,0 +1,120 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/os_interface/device_factory.h"
#include "shared/test/common/libult/linux/drm_mock.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
namespace L0 {
namespace ult {
struct MultiDeviceQueryPeerAccessDrmFixture {
void setUp() {
debugManager.flags.CreateMultipleRootDevices.set(numRootDevices);
NEO::ExecutionEnvironment *executionEnvironment = new NEO::ExecutionEnvironment;
executionEnvironment->prepareRootDeviceEnvironments(numRootDevices);
for (uint32_t i = 0; i < numRootDevices; ++i) {
executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = std::make_unique<MockMemoryOperations>();
executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(new OSInterface());
auto drm = new DrmMock{*executionEnvironment->rootDeviceEnvironments[i]};
executionEnvironment->rootDeviceEnvironments[i]->osInterface->setDriverModel(std::unique_ptr<DriverModel>{drm});
}
driverHandle = std::make_unique<Mock<DriverHandle>>();
driverHandle->importFdHandleResult = reinterpret_cast<void *>(0x1234);
for (auto &device : driverHandle->devices) {
delete device;
}
driverHandle->devices.clear();
auto devices = NEO::DeviceFactory::createDevices(*executionEnvironment);
ze_result_t res = driverHandle->initialize(std::move(devices));
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
ASSERT_EQ(numRootDevices, driverHandle->devices.size());
device0 = driverHandle->devices[0];
device1 = driverHandle->devices[1];
ASSERT_NE(nullptr, device0);
ASSERT_NE(nullptr, device1);
}
void tearDown() {}
struct TempAlloc {
NEO::Device &neoDevice;
void *handlePtr = nullptr;
uint64_t handle = std::numeric_limits<uint64_t>::max();
TempAlloc(NEO::Device &device) : neoDevice(device) {}
~TempAlloc() {
if (handlePtr) {
DeviceImp::freeMemoryAllocation(neoDevice, handlePtr);
}
}
};
uint32_t numRootDevices = 2u;
L0::Device *device0 = nullptr;
L0::Device *device1 = nullptr;
DebugManagerStateRestore restorer;
std::unique_ptr<Mock<DriverHandle>> driverHandle;
};
using MultipleDeviceQueryPeerAccessDrmTests = Test<MultiDeviceQueryPeerAccessDrmFixture>;
TEST_F(MultipleDeviceQueryPeerAccessDrmTests, givenTwoDevicesWhenCanAccessPeerIsCalledManyTimesFromMultiThreadsInBothWaysThenPeerAccessIsQueriedOnlyOnce) {
std::atomic<uint32_t> queryCalled = 0;
auto queryPeerAccess = [&queryCalled](NEO::Device &device, NEO::Device &peerDevice, void **handlePtr, uint64_t *handle) -> bool {
queryCalled++;
return DeviceImp::queryPeerAccess(device, peerDevice, handlePtr, handle);
};
std::atomic_bool started = false;
constexpr int numThreads = 8;
constexpr int iterationCount = 20;
std::vector<std::thread> threads;
auto threadBody = [&](int threadId) {
while (!started.load(std::memory_order_acquire)) {
std::this_thread::yield();
}
auto device = device0;
auto peerDevice = device1;
if (threadId & 1) {
device = device1;
peerDevice = device0;
}
for (auto i = 0; i < iterationCount; i++) {
bool canAccess = device->getNEODevice()->canAccessPeer(queryPeerAccess, DeviceImp::freeMemoryAllocation, peerDevice->getNEODevice());
EXPECT_TRUE(canAccess);
}
};
for (int i = 0; i < numThreads; ++i) {
threads.emplace_back(threadBody, i);
}
started = true;
for (auto &thread : threads) {
thread.join();
}
EXPECT_EQ(1u, queryCalled);
}
} // namespace ult
} // namespace L0

View File

@@ -20,61 +20,6 @@
namespace L0 {
namespace ult {
using MultiDeviceMtTest = Test<MultiDeviceFixture>;
TEST_F(MultiDeviceMtTest, givenTwoDevicesWhenCanAccessPeerIsCalledManyTimesFromMultiThreadsInBothWaysThenPeerAccessIsQueriedOnlyOnce) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];
auto taskCount0 = device0->getNEODevice()->getInternalEngine().commandStreamReceiver->peekLatestFlushedTaskCount();
auto taskCount1 = device1->getNEODevice()->getInternalEngine().commandStreamReceiver->peekLatestFlushedTaskCount();
EXPECT_EQ(taskCount0, taskCount1);
EXPECT_EQ(0u, taskCount0);
std::atomic_bool started = false;
constexpr int numThreads = 8;
constexpr int iterationCount = 20;
std::vector<std::thread> threads;
auto threadBody = [&](int threadId) {
while (!started.load()) {
std::this_thread::yield();
}
auto device = device0;
auto peerDevice = device1;
if (threadId & 1) {
device = device1;
peerDevice = device0;
}
for (auto i = 0; i < iterationCount; i++) {
ze_bool_t canAccess = false;
ze_result_t res = device->canAccessPeer(peerDevice->toHandle(), &canAccess);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_TRUE(canAccess);
}
};
for (int i = 0; i < numThreads; ++i) {
threads.push_back(std::thread(threadBody, i));
}
started = true;
for (auto &thread : threads) {
thread.join();
}
taskCount0 = device0->getNEODevice()->getInternalEngine().commandStreamReceiver->peekLatestFlushedTaskCount();
taskCount1 = device1->getNEODevice()->getInternalEngine().commandStreamReceiver->peekLatestFlushedTaskCount();
EXPECT_NE(taskCount0, taskCount1);
EXPECT_GE(2u, std::max(taskCount0, taskCount1));
EXPECT_EQ(0u, std::min(taskCount0, taskCount1));
}
using DeviceMtTest = Test<DeviceFixture>;
HWTEST_F(DeviceMtTest, givenMultiThreadsExecutingCmdListAndSynchronizingDeviceWhenSynchronizeIsCalledThenTaskCountAndFlushStampAreTakenWithinSingleCriticalSection) {
L0::Device *device = driverHandle->devices[0];

View File

@@ -25,6 +25,7 @@
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
#include "gtest/gtest.h"
#include "hw_device_id.h"
namespace L0 {
namespace ult {
@@ -141,6 +142,5 @@ TEST_F(LuidDeviceTest, givenLuidDevicePropertiesStructureAndAndNoOsInterfaceThen
ze_result_t result = device->getProperties(&deviceProperties);
EXPECT_EQ(result, ZE_RESULT_ERROR_UNINITIALIZED);
}
} // namespace ult
} // namespace L0

View File

@@ -5,12 +5,23 @@
*
*/
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/device_factory.h"
#include "shared/test/common/libult/linux/drm_mock.h"
#include "shared/test/common/mocks/linux/mock_drm_memory_manager.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/driver/driver_handle.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_context.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
#include "drm_memory_manager.h"
#include "gtest/gtest.h"
namespace L0 {
@@ -73,5 +84,214 @@ TEST_F(DrmDeviceTests, givenCacheLevelUnsupportedViaCacheReservationApiWhenUsing
EXPECT_EQ(ZE_RESULT_ERROR_UNSUPPORTED_FEATURE, device->reserveCache(2, 0));
}
class IpcObtainFdMockGraphicsAllocation : public NEO::DrmAllocation {
public:
using NEO::DrmAllocation::bufferObjects;
IpcObtainFdMockGraphicsAllocation(uint32_t rootDeviceIndex,
AllocationType allocationType,
BufferObject *bo,
void *ptrIn,
size_t sizeIn,
NEO::osHandle sharedHandle,
MemoryPool pool,
uint64_t canonizedGpuAddress) : NEO::DrmAllocation(rootDeviceIndex,
1u /*num gmms*/,
allocationType,
bo,
ptrIn,
sizeIn,
sharedHandle,
pool,
canonizedGpuAddress) {
bufferObjects.resize(1u);
}
uint32_t getNumHandles() override {
return 1u;
}
bool isResident(uint32_t contextId) const override {
return false;
}
};
class MemoryManagerFdMock : public NEO::DrmMemoryManager {
public:
MemoryManagerFdMock(NEO::ExecutionEnvironment &executionEnvironment) : NEO::DrmMemoryManager(GemCloseWorkerMode::gemCloseWorkerInactive, false, false, executionEnvironment) {}
NEO::GraphicsAllocation *allocateGraphicsMemoryInPreferredPool(const AllocationProperties &properties, const void *hostPtr) override {
auto ptr = reinterpret_cast<void *>(sharedHandleAddress++);
auto gmmHelper = getGmmHelper(0);
auto canonizedGpuAddress = gmmHelper->canonize(castToUint64(ptr));
size_t size = 0x1000;
auto alloc = new IpcObtainFdMockGraphicsAllocation(0u,
NEO::AllocationType::buffer,
nullptr,
ptr,
size,
0u,
MemoryPool::system4KBPages,
canonizedGpuAddress);
auto &drm = this->getDrm(0u);
alloc->bufferObjects[0] = mockBos.emplace_back(new MockBufferObject{properties.rootDeviceIndex, &drm}).get();
alloc->setGpuBaseAddress(0xabcd);
return alloc;
}
void freeGraphicsMemory(NEO::GraphicsAllocation *alloc, bool isImportedAllocation) override {
delete alloc;
}
int obtainFdFromHandle(int boHandle, uint32_t rootDeviceIndex) override {
if (failOnObtainFdFromHandle) {
failOnObtainFdFromHandle = false;
return -1;
}
return NEO::DrmMemoryManager::obtainFdFromHandle(boHandle, rootDeviceIndex);
}
bool failOnObtainFdFromHandle = false;
uint64_t sharedHandleAddress = 0x1234;
std::vector<std::unique_ptr<MockBufferObject>> mockBos;
};
struct MultiDeviceQueryPeerAccessDrmFixture {
void setUp() {
debugManager.flags.CreateMultipleRootDevices.set(numRootDevices);
NEO::ExecutionEnvironment *executionEnvironment = new NEO::ExecutionEnvironment;
executionEnvironment->prepareRootDeviceEnvironments(numRootDevices);
for (uint32_t i = 0; i < numRootDevices; ++i) {
executionEnvironment->rootDeviceEnvironments[i]->memoryOperationsInterface = std::make_unique<MockMemoryOperations>();
executionEnvironment->rootDeviceEnvironments[i]->osInterface.reset(new OSInterface());
auto drm = new DrmMock{*executionEnvironment->rootDeviceEnvironments[i]};
executionEnvironment->rootDeviceEnvironments[i]->osInterface->setDriverModel(std::unique_ptr<DriverModel>{drm});
}
driverHandle = std::make_unique<Mock<WhiteBox<L0::DriverHandleImp>>>();
driverHandle->importFdHandleResult = reinterpret_cast<void *>(0x1234);
for (auto &device : driverHandle->devices) {
delete device;
}
driverHandle->devices.clear();
auto devices = NEO::DeviceFactory::createDevices(*executionEnvironment);
ze_result_t res = driverHandle->initialize(std::move(devices));
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
prevMemoryManager = driverHandle->getMemoryManager();
currMemoryManager = new MemoryManagerFdMock(*executionEnvironment);
driverHandle->setMemoryManager(currMemoryManager);
prevSvmAllocsManager = driverHandle->svmAllocsManager;
currSvmAllocsManager = new NEO::SVMAllocsManager(currMemoryManager);
driverHandle->svmAllocsManager = currSvmAllocsManager;
ze_context_handle_t hContext{};
ze_context_desc_t desc{ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
res = driverHandle->createContext(&desc, 0u, nullptr, &hContext);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
context = static_cast<ContextImp *>(Context::fromHandle(hContext));
ASSERT_EQ(numRootDevices, driverHandle->devices.size());
device0 = driverHandle->devices[0];
device1 = driverHandle->devices[1];
ASSERT_NE(nullptr, device0);
ASSERT_NE(nullptr, device1);
}
void tearDown() {
driverHandle->svmAllocsManager = prevSvmAllocsManager;
delete currSvmAllocsManager;
driverHandle->setMemoryManager(prevMemoryManager);
delete currMemoryManager;
context->destroy();
}
struct TempAlloc {
NEO::Device &neoDevice;
void *handlePtr = nullptr;
uint64_t handle = std::numeric_limits<uint64_t>::max();
TempAlloc(NEO::Device &device) : neoDevice(device) {}
~TempAlloc() {
if (handlePtr) {
MockDeviceImp::freeMemoryAllocation(neoDevice, handlePtr);
}
}
};
uint32_t numRootDevices = 2u;
L0::Device *device0 = nullptr;
L0::Device *device1 = nullptr;
DebugManagerStateRestore restorer;
std::unique_ptr<Mock<DriverHandle>> driverHandle;
L0::ContextImp *context = nullptr;
NEO::MemoryManager *prevMemoryManager = nullptr;
MemoryManagerFdMock *currMemoryManager = nullptr;
SVMAllocsManager *prevSvmAllocsManager;
NEO::SVMAllocsManager *currSvmAllocsManager;
};
using MultipleDeviceQueryPeerAccessDrmTests = Test<MultiDeviceQueryPeerAccessDrmFixture>;
TEST_F(MultipleDeviceQueryPeerAccessDrmTests, givenTwoRootDevicesFromSameFamilyThenQueryPeerAccessReturnsTrue) {
auto family0 = device0->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
auto family1 = device1->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
EXPECT_EQ(family0, family1);
TempAlloc alloc(*device0->getNEODevice());
bool canAccess = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), &alloc.handlePtr, &alloc.handle);
ASSERT_NE(nullptr, alloc.handlePtr);
EXPECT_TRUE(canAccess);
}
TEST_F(MultipleDeviceQueryPeerAccessDrmTests, givenQueryPeerAccessCalledTwiceThenQueryPeerAccessReturnsTheSameValueEachTime) {
TempAlloc alloc(*device0->getNEODevice());
bool firstAccess = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), &alloc.handlePtr, &alloc.handle);
bool secondAccess = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), &alloc.handlePtr, &alloc.handle);
EXPECT_EQ(firstAccess, secondAccess);
}
TEST_F(MultipleDeviceQueryPeerAccessDrmTests, givenDeviceFailsAllocateMemoryThenQueryPeerAccessReturnsFalse) {
Mock<Context> failingContext;
failingContext.allocDeviceMemResult = ZE_RESULT_ERROR_DEVICE_LOST;
VariableBackup<ze_context_handle_t> backupContext(&driverHandle->defaultContext, failingContext.toHandle());
TempAlloc alloc(*device0->getNEODevice());
bool canAccess = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), &alloc.handlePtr, &alloc.handle);
EXPECT_FALSE(canAccess);
EXPECT_EQ(1u, failingContext.allocDeviceMemCalled);
}
TEST_F(MultipleDeviceQueryPeerAccessDrmTests, givenDeviceFailsImportFdHandleThenQueryPeerAccessReturnsFalse) {
driverHandle->importFdHandleResult = nullptr;
TempAlloc alloc(*device0->getNEODevice());
bool canAccess = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), &alloc.handlePtr, &alloc.handle);
EXPECT_FALSE(canAccess);
EXPECT_EQ(1u, driverHandle->importFdHandleCalled);
}
TEST_F(MultipleDeviceQueryPeerAccessDrmTests, givenDeviceFailPeekInternalHandleThenQueryPeerAccessReturnsFalse) {
TempAlloc alloc(*device0->getNEODevice());
currMemoryManager->failOnObtainFdFromHandle = true;
bool canAccess = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), &alloc.handlePtr, &alloc.handle);
EXPECT_FALSE(canAccess);
}
} // namespace ult
} // namespace L0

View File

@@ -2734,53 +2734,6 @@ TEST_F(MultipleDevicesTest, whenCallingsetAtomicAccessAttributeForSystemAccessSh
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
}
TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTwoRootDevicesFromSameFamilyThenQueryPeerAccessSuccessfullyCompletes) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];
GFXCORE_FAMILY device0Family = device0->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
GFXCORE_FAMILY device1Family = device1->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
EXPECT_EQ(device0Family, device1Family);
bool canAccess = true;
bool res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess);
EXPECT_TRUE(res);
}
HWTEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTwoRootDevicesFromSameFamilyAndDeviceLostSynchronizeThenQueryPeerAccessReturnsFalse) {
constexpr size_t devicesCount{2};
ASSERT_LE(devicesCount, driverHandle->devices.size());
L0::Device *devices[devicesCount] = {driverHandle->devices[0], driverHandle->devices[1]};
std::vector<NEO::Device *> allNeoDevices{};
for (const auto device : devices) {
const auto neoDevice = device->getNEODevice();
const auto neoSubDevices = neoDevice->getSubDevices();
allNeoDevices.push_back(neoDevice);
allNeoDevices.insert(allNeoDevices.end(), neoSubDevices.begin(), neoSubDevices.end());
}
for (const auto neoDevice : allNeoDevices) {
auto deviceInternalEngine = neoDevice->getInternalEngine();
auto hwCsr = static_cast<CommandStreamReceiverHw<FamilyType> *>(deviceInternalEngine.commandStreamReceiver);
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(hwCsr);
ultCsr->callBaseWaitForCompletionWithTimeout = false;
ultCsr->returnWaitForCompletionWithTimeout = WaitStatus::gpuHang;
}
GFXCORE_FAMILY device0Family = devices[0]->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
GFXCORE_FAMILY device1Family = devices[1]->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
EXPECT_EQ(device0Family, device1Family);
bool canAccess = true;
bool res = MockDeviceImp::queryPeerAccess(*devices[0]->getNEODevice(), *devices[1]->getNEODevice(), canAccess);
EXPECT_FALSE(res);
}
using DeviceGetStatusTest = Test<DeviceFixture>;
TEST_F(DeviceGetStatusTest, givenCallToDeviceGetStatusThenCorrectErrorCodeIsReturnedWhenResourcesHaveBeenReleased) {
L0::DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
@@ -3373,394 +3326,6 @@ TEST_F(MultipleDevicesP2PWithXeLinkDevice0Access1Atomic1Device1Access1Atomic1Tes
EXPECT_TRUE(p2pProperties.flags & ZE_DEVICE_P2P_PROPERTY_FLAG_ATOMICS);
}
TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTwoRootDevicesFromSameFamilyThenQueryPeerAccessReturnsTrue) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];
GFXCORE_FAMILY device0Family = device0->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
GFXCORE_FAMILY device1Family = device1->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
EXPECT_EQ(device0Family, device1Family);
bool canAccess = false;
bool res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess);
EXPECT_TRUE(res);
EXPECT_TRUE(canAccess);
}
TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenQueryPeerAccessCalledTwiceThenQueryPeerAccessReturnsSameValueEachTime) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];
GFXCORE_FAMILY device0Family = device0->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
GFXCORE_FAMILY device1Family = device1->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily;
EXPECT_EQ(device0Family, device1Family);
bool canAccess = false;
bool res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess);
EXPECT_TRUE(res);
EXPECT_TRUE(canAccess);
res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess);
EXPECT_TRUE(res);
EXPECT_TRUE(canAccess);
}
TEST_F(MultipleDevicesTest, givenDeviceFailsAppendMemoryCopyThenQueryPeerAccessReturnsFalse) {
struct MockDeviceFail : public MockDeviceImp {
MockDeviceFail(L0::Device *device) : MockDeviceImp(device->getNEODevice()) {
this->driverHandle = device->getDriverHandle();
this->commandList.appendMemoryCopyResult = ZE_RESULT_ERROR_UNKNOWN;
this->neoDevice->setSpecializedDevice<L0::Device>(this);
}
ze_result_t queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
MockCommandList commandList;
Mock<CommandQueue> commandQueue;
};
MockDeviceFail *device0 = new MockDeviceFail(driverHandle->devices[0]);
L0::Device *device1 = driverHandle->devices[1];
bool canAccess = false;
bool res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess);
EXPECT_GT(device0->commandList.appendMemoryCopyCalled, 0u);
EXPECT_TRUE(res);
EXPECT_FALSE(canAccess);
delete device0;
}
TEST_F(MultipleDevicesTest, givenCanAccessPeerSucceedsThenReturnsSuccessAndCorrectValue) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];
ze_bool_t canAccess = false;
ze_result_t res = device0->canAccessPeer(device1->toHandle(), &canAccess);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
EXPECT_TRUE(canAccess);
}
TEST_F(MultipleDevicesTest, givenCanAccessPeerFailsThenReturnsDeviceLost) {
struct MockDeviceFail : public MockDeviceImp {
struct MockCommandQueueImp : public Mock<CommandQueue> {
ze_result_t synchronize(uint64_t timeout) override {
return ZE_RESULT_ERROR_DEVICE_LOST;
}
};
MockDeviceFail(L0::Device *device) : MockDeviceImp(device->getNEODevice()) {
this->driverHandle = device->getDriverHandle();
this->neoDevice->setSpecializedDevice<L0::Device>(this);
}
ze_result_t queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) override {
bandwidth = 0;
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
MockCommandList commandList;
MockCommandQueueImp commandQueue;
};
MockDeviceFail *device0 = new MockDeviceFail(driverHandle->devices[0]);
L0::Device *device1 = driverHandle->devices[1];
ze_bool_t canAccess = true;
ze_result_t res = device0->canAccessPeer(device1->toHandle(), &canAccess);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, res);
EXPECT_FALSE(canAccess);
delete device0;
}
HWTEST_F(MultipleDevicesTest, givenCsrModeDifferentThanHardwareWhenQueryPeerAccessThenReturnsFalse) {
struct MockDeviceFail : public MockDeviceImp {
MockDeviceFail(L0::Device *device) : MockDeviceImp(device->getNEODevice()) {
this->driverHandle = device->getDriverHandle();
this->neoDevice->template setSpecializedDevice<L0::Device>(this);
}
ze_result_t queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
MockCommandList commandList;
Mock<CommandQueue> commandQueue;
};
MockDeviceFail *device0 = new MockDeviceFail(driverHandle->devices[0]);
L0::Device *device1 = driverHandle->devices[1];
auto deviceInternalEngine = device0->getNEODevice()->getInternalEngine();
auto hwCsr = static_cast<CommandStreamReceiverHw<FamilyType> *>(deviceInternalEngine.commandStreamReceiver);
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(hwCsr);
ultCsr->commandStreamReceiverType = CommandStreamReceiverType::tbx;
bool canAccess = false;
bool res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess);
EXPECT_FALSE(res);
EXPECT_FALSE(canAccess);
delete device0;
}
TEST_F(MultipleDevicesTest, givenDeviceFailsExecuteCommandListThenQueryPeerAccessReturnsFalse) {
struct MockDeviceFail : public MockDeviceImp {
struct MockCommandQueueImp : public Mock<CommandQueue> {
ze_result_t destroy() override {
return ZE_RESULT_SUCCESS;
}
ze_result_t executeCommandLists(uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists,
ze_fence_handle_t hFence, bool performMigration,
NEO::LinearStream *parentImmediateCommandlistLinearStream,
std::unique_lock<std::mutex> *outerLockForIndirect)
override { return ZE_RESULT_ERROR_UNKNOWN; }
};
MockDeviceFail(L0::Device *device) : MockDeviceImp(device->getNEODevice()) {
this->driverHandle = device->getDriverHandle();
this->neoDevice->setSpecializedDevice<L0::Device>(this);
}
ze_result_t queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
MockCommandList commandList;
MockCommandQueueImp commandQueue;
};
MockDeviceFail *device0 = new MockDeviceFail(driverHandle->devices[0]);
L0::Device *device1 = driverHandle->devices[1];
bool canAccess = false;
bool res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess);
EXPECT_TRUE(res);
EXPECT_FALSE(canAccess);
delete device0;
}
TEST_F(MultipleDevicesTest, givenQueryFabricStatsReturningBandwidthZeroAndDeviceFailsThenQueryPeerAccessReturnsFalse) {
struct MockDeviceFail : public MockDeviceImp {
struct MockCommandQueueImp : public Mock<CommandQueue> {
ze_result_t destroy() override {
return ZE_RESULT_SUCCESS;
}
ze_result_t executeCommandLists(uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists,
ze_fence_handle_t hFence, bool performMigration,
NEO::LinearStream *parentImmediateCommandlistLinearStream,
std::unique_lock<std::mutex> *outerLockForIndirect)
override { return ZE_RESULT_ERROR_UNKNOWN; }
};
MockDeviceFail(L0::Device *device) : MockDeviceImp(device->getNEODevice()) {
this->driverHandle = device->getDriverHandle();
this->neoDevice->setSpecializedDevice<L0::Device>(this);
}
ze_result_t queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) override {
bandwidth = 0;
return ZE_RESULT_SUCCESS;
}
ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
MockCommandList commandList;
MockCommandQueueImp commandQueue;
};
MockDeviceFail *device0 = new MockDeviceFail(driverHandle->devices[0]);
L0::Device *device1 = driverHandle->devices[1];
bool canAccess = false;
bool res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess);
EXPECT_TRUE(res);
EXPECT_FALSE(canAccess);
delete device0;
}
TEST_F(MultipleDevicesTest, givenQueryFabricStatsReturningBandwidthNonZeroAndDeviceDoesFailThenQueryPeerAccessReturnsTrue) {
struct MockDeviceFail : public MockDeviceImp {
struct MockCommandQueueImp : public Mock<CommandQueue> {
ze_result_t destroy() override {
return ZE_RESULT_SUCCESS;
}
ze_result_t executeCommandLists(uint32_t numCommandLists,
ze_command_list_handle_t *phCommandLists,
ze_fence_handle_t hFence, bool performMigration,
NEO::LinearStream *parentImmediateCommandlistLinearStream,
std::unique_lock<std::mutex> *outerLockForIndirect) override {
return ZE_RESULT_SUCCESS;
}
};
MockDeviceFail(L0::Device *device) : MockDeviceImp(device->getNEODevice()) {
this->driverHandle = device->getDriverHandle();
this->neoDevice->setSpecializedDevice<L0::Device>(this);
}
ze_result_t queryFabricStats(DeviceImp *pPeerDevice, uint32_t &latency, uint32_t &bandwidth) override {
bandwidth = 100;
return ZE_RESULT_SUCCESS;
}
ze_result_t createCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandQueue(const ze_command_queue_desc_t *desc,
ze_command_queue_handle_t *commandQueue) override {
*commandQueue = &this->commandQueue;
return ZE_RESULT_SUCCESS;
}
ze_result_t createInternalCommandList(const ze_command_list_desc_t *desc,
ze_command_list_handle_t *commandList) override {
*commandList = &this->commandList;
return ZE_RESULT_SUCCESS;
}
MockCommandList commandList;
MockCommandQueueImp commandQueue;
};
MockDeviceFail *device0 = new MockDeviceFail(driverHandle->devices[0]);
L0::Device *device1 = driverHandle->devices[1];
bool canAccess = false;
bool res = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), canAccess);
EXPECT_TRUE(res);
EXPECT_TRUE(canAccess);
delete device0;
}
TEST_F(MultipleDevicesDisabledImplicitScalingTest, givenTopologyForTwoSubdevicesWhenGettingApiSliceIdWithRootDeviceThenCorrectMappingIsUsedAndApiSliceIdsForSubdeviceReturned) {
L0::Device *device0 = driverHandle->devices[0];
auto deviceImp0 = static_cast<DeviceImp *>(device0);
@@ -6878,5 +6443,18 @@ TEST_F(L0DeviceGetCmdlistCreateFunTest, GivenQueryDeviceRecordReplayGraphWhenRet
EXPECT_EQ(deviceRecordReplayGraphCapability, recordReplayGraphProperties.graphFlags);
}
using MultipleDeviceQueryPeerAccessTests = Test<MultiDeviceFixture>;
TEST_F(MultipleDeviceQueryPeerAccessTests, givenDeviceQueryPeerAccessAndNoOsInterfaceThenReturnsFalse) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];
void *handlePtr = nullptr;
uint64_t handle = std::numeric_limits<uint64_t>::max();
bool canAccess = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), &handlePtr, &handle);
EXPECT_FALSE(canAccess);
}
} // namespace ult
} // namespace L0

View File

@@ -0,0 +1,12 @@
#
# Copyright (C) 2025 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
if(WIN32 OR(UNIX AND NOT DISABLE_WDDM_LINUX))
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_l0_wddm_device.cpp
)
endif()

View File

@@ -0,0 +1,30 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
namespace L0 {
namespace ult {
using MultipleDeviceQueryPeerAccessWddmTests = Test<MultiDeviceFixture>;
TEST_F(MultipleDeviceQueryPeerAccessWddmTests, givenDeviceQueryPeerAccessThenReturnFalse) {
L0::Device *device0 = driverHandle->devices[0];
L0::Device *device1 = driverHandle->devices[1];
void *handlePtr = nullptr;
uint64_t handle = std::numeric_limits<uint64_t>::max();
bool canAccess = MockDeviceImp::queryPeerAccess(*device0->getNEODevice(), *device1->getNEODevice(), &handlePtr, &handle);
EXPECT_FALSE(canAccess);
}
} // namespace ult
} // namespace L0

View File

@@ -23,6 +23,7 @@
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/ray_tracing_helper.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/unified_memory_pooling.h"
#include "shared/source/os_interface/driver_info.h"
@@ -1370,57 +1371,70 @@ std::vector<DeviceVector> Device::groupDevices(DeviceVector devices) {
return outDevices;
}
bool Device::canAccessPeer(QueryPeerAccessFunc queryPeerAccess, Device *peerDevice, bool &canAccess) {
bool retVal = true;
bool Device::canAccessPeer(QueryPeerAccessFunc queryPeerAccess, FreeMemoryFunc freeMemory, Device *peerDevice) {
if (NEO::debugManager.flags.ForceZeDeviceCanAccessPerReturnValue.get() != -1) {
canAccess = !!NEO::debugManager.flags.ForceZeDeviceCanAccessPerReturnValue.get();
return retVal;
return !!NEO::debugManager.flags.ForceZeDeviceCanAccessPerReturnValue.get();
}
const uint32_t rootDeviceIndex = this->getRootDeviceIndex();
const uint32_t peerRootDeviceIndex = peerDevice->getRootDeviceIndex();
if (rootDeviceIndex == peerRootDeviceIndex) {
canAccess = true;
return retVal;
return true;
}
auto handle = std::numeric_limits<uint64_t>::max();
void *handlePtr = nullptr;
auto lock = executionEnvironment->obtainPeerAccessQueryLock();
if (this->crossAccessEnabledDevices.find(peerRootDeviceIndex) == this->crossAccessEnabledDevices.end()) {
retVal = queryPeerAccess(*this, *peerDevice, canAccess);
bool canAccess = queryPeerAccess(*this, *peerDevice, &handlePtr, &handle);
this->updatePeerAccessCache(peerDevice, canAccess);
}
canAccess = this->crossAccessEnabledDevices[peerRootDeviceIndex];
return retVal;
if (handlePtr) {
freeMemory(*this, handlePtr);
}
return this->crossAccessEnabledDevices[peerRootDeviceIndex];
}
void Device::initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, const std::vector<NEO::Device *> &devices) {
void Device::initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, FreeMemoryFunc freeMemory, const std::vector<NEO::Device *> &devices) {
for (auto &device : devices) {
if (device->getReleaseHelper() && device->getReleaseHelper()->shouldQueryPeerAccess()) {
device->hasPeerAccess = false;
auto rootDeviceIndex = device->getRootDeviceIndex();
auto releaseHelper = device->getReleaseHelper();
if (!releaseHelper || !releaseHelper->shouldQueryPeerAccess()) {
continue;
}
for (auto &peerDevice : devices) {
auto peerRootDeviceIndex = peerDevice->getRootDeviceIndex();
if (rootDeviceIndex == peerRootDeviceIndex) {
continue;
}
device->hasPeerAccess = false;
const auto deviceRootIndex = device->getRootDeviceIndex();
bool canAccess = false;
if (device->crossAccessEnabledDevices.find(peerRootDeviceIndex) == device->crossAccessEnabledDevices.end()) {
auto lock = device->getExecutionEnvironment()->obtainPeerAccessQueryLock();
queryPeerAccess(*device, *peerDevice, canAccess);
device->updatePeerAccessCache(peerDevice, canAccess);
} else {
canAccess = device->crossAccessEnabledDevices[peerRootDeviceIndex];
}
void *handlePtr = nullptr;
uint64_t handle = std::numeric_limits<uint64_t>::max();
if (canAccess) {
device->hasPeerAccess = true;
}
for (auto &peerDevice : devices) {
const auto peerRootIndex = peerDevice->getRootDeviceIndex();
if (deviceRootIndex == peerRootIndex) {
continue;
}
bool canAccess = false;
if (device->crossAccessEnabledDevices.find(peerRootIndex) == device->crossAccessEnabledDevices.end()) {
auto lock = device->getExecutionEnvironment()->obtainPeerAccessQueryLock();
canAccess = queryPeerAccess(*device, *peerDevice, &handlePtr, &handle);
device->updatePeerAccessCache(peerDevice, canAccess);
} else {
canAccess = device->crossAccessEnabledDevices[peerRootIndex];
}
if (canAccess) {
device->hasPeerAccess = true;
}
}
if (handlePtr) {
freeMemory(*device, handlePtr);
handlePtr = nullptr;
handle = std::numeric_limits<uint64_t>::max();
}
}
}

View File

@@ -91,7 +91,8 @@ struct RTDispatchGlobalsInfo {
std::vector<GraphicsAllocation *> rtStacks; // per tile
};
using QueryPeerAccessFunc = std::function<bool(Device &, Device &, bool &)>;
using QueryPeerAccessFunc = std::function<bool(Device &, Device &, void **, uint64_t *)>;
using FreeMemoryFunc = std::function<void(Device &, void *)>;
class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMovableClass {
public:
@@ -274,8 +275,8 @@ class Device : public ReferenceTrackedObject<Device>, NEO::NonCopyableAndNonMova
void resetUsmGlobalSurfaceAllocPool(UsmMemAllocPool *usmMemAllocPool);
std::unordered_map<uint32_t, bool> crossAccessEnabledDevices;
bool canAccessPeer(QueryPeerAccessFunc queryPeerAccess, Device *peerDevice, bool &canAccess);
static void initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, const std::vector<NEO::Device *> &devices);
bool canAccessPeer(QueryPeerAccessFunc queryPeerAccess, FreeMemoryFunc freeMemory, Device *peerDevice);
static void initializePeerAccessForDevices(QueryPeerAccessFunc queryPeerAccess, FreeMemoryFunc freeMemory, const std::vector<NEO::Device *> &devices);
std::optional<bool> hasAnyPeerAccess() const {
return hasPeerAccess;

View File

@@ -2857,14 +2857,11 @@ TEST(DeviceCanAccessPeerTest, givenTheSameDeviceThenCanAccessPeerReturnsTrue) {
UltDeviceFactory deviceFactory{2, 0};
auto rootDevice0 = deviceFactory.rootDevices[0];
auto queryPeerAccess = [](Device &peerDevice, Device &device, bool &canAccess) -> bool {
canAccess = false;
auto queryPeerAccess = [](Device &peerDevice, Device &device, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
return false;
};
bool canAccess = false;
bool result = rootDevice0->canAccessPeer(queryPeerAccess, rootDevice0, canAccess);
EXPECT_TRUE(result);
bool canAccess = rootDevice0->canAccessPeer(queryPeerAccess, nullptr, rootDevice0);
EXPECT_TRUE(canAccess);
}
@@ -2877,27 +2874,21 @@ TEST(DeviceCanAccessPeerTest, givenTwoRootDevicesThenCanAccessPeerReturnsValueBa
{
uint32_t flagValue = 1;
bool canAccess = false;
auto queryPeerAccess = [&flagValue](Device &peerDevice, Device &device, bool &canAccess) -> bool {
canAccess = !!flagValue;
return false;
auto queryPeerAccess = [&flagValue](Device &peerDevice, Device &device, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
return !!flagValue;
};
debugManager.flags.ForceZeDeviceCanAccessPerReturnValue.set(flagValue);
bool result = rootDevice0->canAccessPeer(queryPeerAccess, rootDevice1, canAccess);
EXPECT_TRUE(result);
bool canAccess = rootDevice0->canAccessPeer(queryPeerAccess, nullptr, rootDevice1);
EXPECT_TRUE(canAccess);
}
{
uint32_t flagValue = 0;
bool canAccess = true;
auto queryPeerAccess = [&flagValue](Device &peerDevice, Device &device, bool &canAccess) -> bool {
canAccess = !!flagValue;
return false;
auto queryPeerAccess = [&flagValue](Device &peerDevice, Device &device, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
return !!flagValue;
};
debugManager.flags.ForceZeDeviceCanAccessPerReturnValue.set(flagValue);
bool result = rootDevice0->canAccessPeer(queryPeerAccess, rootDevice1, canAccess);
EXPECT_TRUE(result);
bool canAccess = rootDevice0->canAccessPeer(queryPeerAccess, nullptr, rootDevice1);
EXPECT_FALSE(canAccess);
}
}
@@ -2909,22 +2900,17 @@ TEST(DeviceCanAccessPeerTest, givenCanAccessPeerCalledTwiceThenCanAccessPeerCach
uint32_t queryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &peerDevice, Device &device, bool &canAccess) -> bool {
auto queryPeerAccess = [&queryCalled](Device &peerDevice, Device &device, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
canAccess = true;
return true;
};
bool canAccess = false;
bool res = rootDevice0->canAccessPeer(queryPeerAccess, rootDevice1, canAccess);
bool canAccess = rootDevice0->canAccessPeer(queryPeerAccess, nullptr, rootDevice1);
EXPECT_EQ(1u, queryCalled);
EXPECT_TRUE(res);
EXPECT_TRUE(canAccess);
res = rootDevice0->canAccessPeer(queryPeerAccess, rootDevice1, canAccess);
canAccess = rootDevice0->canAccessPeer(queryPeerAccess, nullptr, rootDevice1);
EXPECT_EQ(1u, queryCalled);
EXPECT_TRUE(res);
EXPECT_TRUE(canAccess);
}
@@ -2935,17 +2921,38 @@ TEST(DeviceCanAccessPeerTest, givenTwoSubDevicesFromTheSameRootDeviceThenCanAcce
auto subDevice0 = rootDevice->getSubDevices()[0];
auto subDevice1 = rootDevice->getSubDevices()[1];
auto queryPeerAccess = [](Device &peerDevice, Device &device, bool &canAccess) -> bool {
canAccess = false;
auto queryPeerAccess = [](Device &peerDevice, Device &device, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
return false;
};
bool canAccess = false;
bool res = subDevice0->canAccessPeer(queryPeerAccess, subDevice1, canAccess);
EXPECT_TRUE(res);
bool canAccess = subDevice0->canAccessPeer(queryPeerAccess, nullptr, subDevice1);
EXPECT_TRUE(canAccess);
}
TEST(DeviceCanAccessPeerTest, givenMemoryAllocationWhenCanAccessPeerThenFreeAllocation) {
UltDeviceFactory deviceFactory{2, 0};
auto rootDevice0 = deviceFactory.rootDevices[0];
auto rootDevice1 = deviceFactory.rootDevices[1];
uint32_t queryCalled = 0;
uint32_t freeMemoryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &peerDevice, Device &device, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
*memoryAllocation = reinterpret_cast<void *>(0xABCD);
return true;
};
auto freeMemory = [&freeMemoryCalled](Device &device, void *memoryAllocation) -> void {
freeMemoryCalled++;
};
rootDevice0->canAccessPeer(queryPeerAccess, freeMemory, rootDevice1);
EXPECT_EQ(1u, queryCalled);
EXPECT_EQ(1u, freeMemoryCalled);
}
TEST(DevicePeerAccessInitializationTest, givenDeviceListWhenInitializePeerAccessThenQueryOnlyRelevantPeers) {
UltDeviceFactory deviceFactory{3, 0};
std::vector<NEO::Device *> rootDevices = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1], deviceFactory.rootDevices[2]};
@@ -2963,13 +2970,12 @@ TEST(DevicePeerAccessInitializationTest, givenDeviceListWhenInitializePeerAccess
rootDevices[2]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper2);
uint32_t queryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool {
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
canAccess = true;
return true;
};
MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices);
MockDevice::initializePeerAccessForDevices(queryPeerAccess, nullptr, rootDevices);
// Check device[0] with none
// Check device[1] with device[0] and device[2] - 2 calls
@@ -2991,13 +2997,12 @@ TEST(DevicePeerAccessInitializationTest, givenSubDevicesWhenInitializePeerAccess
subDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1);
uint32_t queryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool {
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
canAccess = true;
return true;
};
MockDevice::initializePeerAccessForDevices(queryPeerAccess, subDevices);
MockDevice::initializePeerAccessForDevices(queryPeerAccess, nullptr, subDevices);
EXPECT_EQ(0u, queryCalled);
}
@@ -3017,13 +3022,12 @@ TEST(DevicePeerAccessInitializationTest, givenDevicesWithPeerAccessCachedWhenIni
rootDevices[0]->updatePeerAccessCache(rootDevices[1], true);
uint32_t queryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool {
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
canAccess = false;
return true;
return false;
};
MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices);
MockDevice::initializePeerAccessForDevices(queryPeerAccess, nullptr, rootDevices);
EXPECT_EQ(0u, queryCalled);
}
@@ -3043,13 +3047,12 @@ TEST(DevicePeerAccessInitializationTest, givenDevicesWhenInitializePeerAccessFor
rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1);
uint32_t queryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool {
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
canAccess = true;
return true;
};
MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices);
MockDevice::initializePeerAccessForDevices(queryPeerAccess, nullptr, rootDevices);
EXPECT_EQ(1u, queryCalled);
ASSERT_TRUE(rootDevices[0]->hasAnyPeerAccess().has_value());
@@ -3072,13 +3075,12 @@ TEST(DevicePeerAccessInitializationTest, givenDevicesWhenInitializePeerAccessFor
rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1);
uint32_t queryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool {
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
canAccess = false;
return true;
return false;
};
MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices);
MockDevice::initializePeerAccessForDevices(queryPeerAccess, nullptr, rootDevices);
EXPECT_EQ(1u, queryCalled);
ASSERT_TRUE(rootDevices[0]->hasAnyPeerAccess().has_value());
@@ -3101,13 +3103,12 @@ TEST(DevicePeerAccessInitializationTest, givenDevicesThatDontRequirePeerAccessQu
rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1);
uint32_t queryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool {
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
canAccess = true;
return true;
};
MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices);
MockDevice::initializePeerAccessForDevices(queryPeerAccess, nullptr, rootDevices);
EXPECT_EQ(0u, queryCalled);
@@ -3126,16 +3127,56 @@ TEST(DevicePeerAccessInitializationTest, givenDevicesWithoutReleaseHelperWhenIni
ASSERT_EQ(nullptr, rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper);
uint32_t queryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, bool &canAccess) -> bool {
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
canAccess = true;
return true;
};
MockDevice::initializePeerAccessForDevices(queryPeerAccess, rootDevices);
MockDevice::initializePeerAccessForDevices(queryPeerAccess, nullptr, rootDevices);
EXPECT_EQ(0u, queryCalled);
EXPECT_FALSE(rootDevices[0]->hasAnyPeerAccess().has_value());
EXPECT_FALSE(rootDevices[1]->hasAnyPeerAccess().has_value());
}
TEST(DevicePeerAccessInitializationTest, givenMemoryAllocationWhenInitializePeerAccessForDevicesThenFreeAllocation) {
UltDeviceFactory deviceFactory{4, 0};
std::vector<NEO::Device *> rootDevices = {deviceFactory.rootDevices[0], deviceFactory.rootDevices[1], deviceFactory.rootDevices[2], deviceFactory.rootDevices[3]};
auto releaseHelper0 = std::make_unique<MockReleaseHelper>();
releaseHelper0->shouldQueryPeerAccessResult = true;
rootDevices[0]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper0);
auto releaseHelper1 = std::make_unique<MockReleaseHelper>();
releaseHelper1->shouldQueryPeerAccessResult = true;
rootDevices[1]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper1);
auto releaseHelper2 = std::make_unique<MockReleaseHelper>();
releaseHelper2->shouldQueryPeerAccessResult = true;
rootDevices[2]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper2);
auto releaseHelper3 = std::make_unique<MockReleaseHelper>();
releaseHelper3->shouldQueryPeerAccessResult = true;
rootDevices[3]->getRootDeviceEnvironmentRef().releaseHelper = std::move(releaseHelper3);
uint32_t queryCalled = 0;
uint32_t freeMemoryCalled = 0;
auto queryPeerAccess = [&queryCalled](Device &device, Device &peerDevice, void **memoryAllocation, uint64_t *memoryHandle) -> bool {
queryCalled++;
*memoryAllocation = reinterpret_cast<void *>(0xABCD);
return true;
};
auto freeMemory = [&freeMemoryCalled](Device &device, void *memoryAllocation) -> void {
freeMemoryCalled++;
};
MockDevice::initializePeerAccessForDevices(queryPeerAccess, freeMemory, rootDevices);
// Each unique device pair is queried once (6 pairs for 4 devices), and memory is freed for each query
EXPECT_EQ(6u, queryCalled);
EXPECT_EQ(3u, freeMemoryCalled);
}