diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 5a3dc6b44f..31b5caa0da 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -393,6 +393,7 @@ struct CommandListCoreFamily : public CommandListImp { bool isCopyOffloadAllowed(const NEO::GraphicsAllocation &srcAllocation, const NEO::GraphicsAllocation &dstAllocation) const; void setAdditionalKernelLaunchParams(CmdListKernelLaunchParams &launchParams, Kernel &kernel) const; void dispatchInOrderPostOperationBarrier(Event *signalOperation, bool dcFlushRequired, bool copyOperation); + NEO::GraphicsAllocation *getDeviceCounterAllocForResidency(NEO::GraphicsAllocation *counterDeviceAlloc); NEO::InOrderPatchCommandsContainer inOrderPatchCmds; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 99624f8b5a..bf9d812a38 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2647,6 +2647,20 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han return ZE_RESULT_SUCCESS; } +template +NEO::GraphicsAllocation *CommandListCoreFamily::getDeviceCounterAllocForResidency(NEO::GraphicsAllocation *counterDeviceAlloc) { + NEO::GraphicsAllocation *counterDeviceAllocForResidency = counterDeviceAlloc; + + if (counterDeviceAllocForResidency && (counterDeviceAllocForResidency->getRootDeviceIndex() != device->getRootDeviceIndex())) { + DriverHandleImp *driverHandle = static_cast(device->getDriverHandle()); + + counterDeviceAllocForResidency = driverHandle->getCounterPeerAllocation(device, *counterDeviceAllocForResidency); + UNRECOVERABLE_IF(!counterDeviceAllocForResidency); + UNRECOVERABLE_IF(counterDeviceAllocForResidency->getGpuAddress() != counterDeviceAlloc->getGpuAddress()); + } + return counterDeviceAllocForResidency; +} + template void CommandListCoreFamily::appendWaitOnInOrderDependency(std::shared_ptr &inOrderExecInfo, CommandToPatchContainer *outListCommands, uint64_t waitValue, uint32_t offset, bool relaxedOrderingAllowed, bool implicitDependency, bool skipAddingWaitEventsToResidency, @@ -2655,16 +2669,7 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(std::sh UNRECOVERABLE_IF(waitValue > static_cast(std::numeric_limits::max()) && !isQwordInOrderCounter()); - auto deviceAllocForResidency = inOrderExecInfo->getDeviceCounterAllocation(); - - if (deviceAllocForResidency && (deviceAllocForResidency->getRootDeviceIndex() != device->getRootDeviceIndex())) { - DriverHandleImp *driverHandle = static_cast(device->getDriverHandle()); - - deviceAllocForResidency = driverHandle->getCounterPeerAllocation(device, *deviceAllocForResidency); - UNRECOVERABLE_IF(!deviceAllocForResidency); - UNRECOVERABLE_IF(deviceAllocForResidency->getGpuAddress() != inOrderExecInfo->getDeviceCounterAllocation()->getGpuAddress()); - } - + auto deviceAllocForResidency = this->getDeviceCounterAllocForResidency(inOrderExecInfo->getDeviceCounterAllocation()); if (!skipAddingWaitEventsToResidency) { commandContainer.addToResidencyContainer(deviceAllocForResidency); } diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index c060f94097..1df3b8b0dc 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -616,6 +616,7 @@ class MockCommandListCoreFamily : public CommandListCoreFamily { using BaseClass::dummyBlitWa; using BaseClass::enableInOrderExecution; using BaseClass::encodeMiFlush; + using BaseClass::getDeviceCounterAllocForResidency; using BaseClass::ownedPrivateAllocations; using BaseClass::taskCountUpdateFenceRequired; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_8.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_8.cpp index aed77ce4f5..1f04968994 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_8.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_8.cpp @@ -1872,5 +1872,62 @@ HWTEST2_F(CommandListCreate, givenNewSizeDifferentThanSizesInMapWhenAllocatingPr neoDevice->getMemoryManager()->freeGraphicsMemory(commandList->commandContainer.getResidencyContainer()[0]); } +HWTEST2_F(CommandListCreate, givenCounterDeviceAllocWhenGetDeviceCounterAllocForResidencyThenReturnCorrectAllocation, MatchAny) { + auto commandList = std::make_unique>(); + commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); + + MockGraphicsAllocation counterDeviceAlloc(this->device->getRootDeviceIndex(), nullptr, 0x0); + auto result = commandList->getDeviceCounterAllocForResidency(&counterDeviceAlloc); + EXPECT_EQ(result, &counterDeviceAlloc); +} + +HWTEST2_F(CommandListCreate, givenCounterDeviceAllocFromDifferentRootDeviceWhenGetDeviceCounterAllocForResidencyThenReturnPeerAllocation, MatchAny) { + auto commandList = std::make_unique>(); + commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); + uint32_t baseDeviceIndex = this->device->getRootDeviceIndex(); + uint32_t peerDeviceIndex = baseDeviceIndex + 1; + + MockGraphicsAllocation counterDeviceAlloc(baseDeviceIndex, reinterpret_cast(0x1234), 0x0u); + MockGraphicsAllocation peerAllocation(peerDeviceIndex, reinterpret_cast(0x5678), 0x0u); + ASSERT_NE(counterDeviceAlloc.getRootDeviceIndex(), peerAllocation.getRootDeviceIndex()); + ASSERT_NE(counterDeviceAlloc.getGpuAddress(), peerAllocation.getGpuAddress()); + + auto result = commandList->getDeviceCounterAllocForResidency(&peerAllocation); + EXPECT_EQ(result->getGpuAddress(), peerAllocation.getGpuAddress()); +} + +HWTEST2_F(CommandListCreate, givenCounterDeviceAllocFromDifferentRootDeviceWhenGetDeviceCounterAllocForResidencyAndAllocIsAlreadyImportedThenReturnPeerAllocation, MatchAny) { + auto commandList = std::make_unique>(); + commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); + uint32_t baseDeviceIndex = this->device->getRootDeviceIndex(); + uint32_t peerDeviceIndex = baseDeviceIndex + 1; + + MockGraphicsAllocation counterDeviceAlloc(baseDeviceIndex, reinterpret_cast(0x1234), 0x0u); + MockGraphicsAllocation peerAllocation(peerDeviceIndex, reinterpret_cast(0x5678), 0x0u); + ASSERT_NE(counterDeviceAlloc.getRootDeviceIndex(), peerAllocation.getRootDeviceIndex()); + ASSERT_NE(counterDeviceAlloc.getGpuAddress(), peerAllocation.getGpuAddress()); + + auto deviceImp = static_cast(this->device); + NEO::SvmAllocationData allocData(peerDeviceIndex); + allocData.gpuAllocations.addAllocation(&peerAllocation); + deviceImp->peerCounterAllocations.allocations.insert({reinterpret_cast(peerAllocation.getGpuAddress()), allocData}); + + auto result = commandList->getDeviceCounterAllocForResidency(&peerAllocation); + EXPECT_EQ(result->getGpuAddress(), peerAllocation.getGpuAddress()); + + deviceImp->peerCounterAllocations.remove(allocData); +} + +HWTEST2_F(CommandListCreate, givenNullptrPeerAllocationWhenGetDeviceCounterAllocForResidencyThenAbortIsThrown, MatchAny) { + auto commandList = std::make_unique>(); + commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u); + uint32_t baseDeviceIndex = this->device->getRootDeviceIndex(); + uint32_t peerDeviceIndex = baseDeviceIndex + 1; + + MockGraphicsAllocation counterDeviceAlloc(peerDeviceIndex, nullptr, 0x0); + + EXPECT_ANY_THROW(commandList->getDeviceCounterAllocForResidency(&counterDeviceAlloc)); +} + } // namespace ult } // namespace L0