diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index ac32588c67..b34cb17a02 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -374,6 +374,38 @@ void CommandListImp::clearInOrderExecCounterAllocation() { } } +size_t CommandListImp::getInOrderExecDeviceRequiredSize() const { + size_t size = 0; + if (isInOrderExecutionEnabled()) { + size = inOrderExecInfo->getDeviceNodeWriteSize(); + } + return size; +} + +uint64_t CommandListImp::getInOrderExecDeviceGpuAddress() const { + uint64_t gpuAddress = 0; + if (isInOrderExecutionEnabled()) { + gpuAddress = inOrderExecInfo->getDeviceNodeGpuAddress(); + } + return gpuAddress; +} + +size_t CommandListImp::getInOrderExecHostRequiredSize() const { + size_t size = 0; + if (isInOrderExecutionEnabled()) { + size = inOrderExecInfo->getHostNodeWriteSize(); + } + return size; +} + +uint64_t CommandListImp::getInOrderExecHostGpuAddress() const { + uint64_t gpuAddress = 0; + if (isInOrderExecutionEnabled()) { + gpuAddress = inOrderExecInfo->getHostNodeGpuAddress(); + } + return gpuAddress; +} + void CommandListImp::enableSynchronizedDispatch(NEO::SynchronizedDispatchMode mode) { if (!device->isImplicitScalingCapable() || this->synchronizedDispatchMode != NEO::SynchronizedDispatchMode::disabled) { return; diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index cca628881a..9f10381009 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -52,6 +52,10 @@ struct CommandListImp : public CommandList { void setInterruptEventsCsr(NEO::CommandStreamReceiver &csr); virtual bool kernelMemoryPrefetchEnabled() const = 0; std::shared_ptr &getInOrderExecInfo() { return inOrderExecInfo; } + size_t getInOrderExecDeviceRequiredSize() const; + uint64_t getInOrderExecDeviceGpuAddress() const; + size_t getInOrderExecHostRequiredSize() const; + uint64_t getInOrderExecHostGpuAddress() const; protected: std::shared_ptr inOrderExecInfo; diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index 4421d18502..300dddc9c4 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -23,6 +23,7 @@ #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/product_helper.h" +#include "level_zero/core/source/cmdlist/cmdlist_imp.h" #include "level_zero/core/source/cmdqueue/cmdqueue_imp.h" #include "level_zero/core/source/device/device.h" #include "level_zero/core/source/device/device_imp.h" @@ -400,4 +401,13 @@ void CommandQueueImp::makeResidentForResidencyContainer(const NEO::ResidencyCont } } +void CommandQueueImp::prepareInOrderCommandList(CommandListImp *commandList) { + if (commandList->inOrderCmdsPatchingEnabled()) { + commandList->addRegularCmdListSubmissionCounter(); + commandList->patchInOrderCmds(); + } else { + commandList->clearInOrderExecCounterAllocation(); + } +} + } // namespace L0 diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 6456674598..3de2d28239 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -788,12 +788,7 @@ ze_result_t CommandQueueHw::setupCmdListsAndContextParams( } commandList->storeReferenceTsToMappedEvents(false); - if (commandList->inOrderCmdsPatchingEnabled()) { - commandList->addRegularCmdListSubmissionCounter(); - commandList->patchInOrderCmds(); - } else { - commandList->clearInOrderExecCounterAllocation(); - } + this->prepareInOrderCommandList(commandList); commandList->setInterruptEventsCsr(*this->csr); diff --git a/level_zero/core/source/cmdqueue/cmdqueue_imp.h b/level_zero/core/source/cmdqueue/cmdqueue_imp.h index eced42b232..9a44b84702 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_imp.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_imp.h @@ -34,6 +34,7 @@ class MemoryManager; namespace L0 { struct CommandList; +struct CommandListImp; struct Kernel; struct CommandQueueImp : public CommandQueue { class CommandBufferManager { @@ -126,6 +127,7 @@ struct CommandQueueImp : public CommandQueue { ze_result_t synchronizeByPollingForTaskCount(uint64_t timeoutNanoseconds); void postSyncOperations(bool hangDetected); + void prepareInOrderCommandList(CommandListImp *commandList); static constexpr uint32_t defaultCommandListStateChangeListSize = 10; struct CommandListDirtyFlags { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index d1f51dae3f..04b64b3c0b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -1704,5 +1704,17 @@ HWTEST_F(ImmediateCommandListTest, EXPECT_TRUE(ultCsr.isMadeResident(cmdBufferAllocation)); } +HWTEST_F(CommandListCreateTests, givenRegularOutOfOrderCommandListWhenGettingInOrderPropertiesThenReturnZeros) { + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + auto commandListImp = static_cast(commandList.get()); + EXPECT_EQ(0u, commandListImp->getInOrderExecDeviceRequiredSize()); + EXPECT_EQ(0u, commandListImp->getInOrderExecDeviceGpuAddress()); + EXPECT_EQ(0u, commandListImp->getInOrderExecHostRequiredSize()); + EXPECT_EQ(0u, commandListImp->getInOrderExecHostGpuAddress()); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_3.cpp index afc007fa19..a0209b4720 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_3.cpp @@ -318,5 +318,29 @@ HWTEST_F(InOrderIpcTests, givenIncorrectParamsWhenUsingIpcApisThenReturnError) { EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, zexCounterBasedEventOpenIpcHandle(context->toHandle(), zexIpcData, nullptr)); } +using InOrderRegularCmdListTests = InOrderCmdListFixture; +HWTEST_F(InOrderRegularCmdListTests, givenInOrderCmdListWhenQueryingRequiredSizeThenExpectCorrectValues) { + debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(0); + + auto regularCmdList = createRegularCmdList(false); + + auto deviceRequiredSize = regularCmdList->getInOrderExecDeviceRequiredSize(); + EXPECT_EQ(sizeof(uint64_t), deviceRequiredSize); + auto deviceNodeAddress = regularCmdList->getInOrderExecDeviceGpuAddress(); + EXPECT_NE(0u, deviceNodeAddress); + auto hostRequiredSize = regularCmdList->getInOrderExecHostRequiredSize(); + EXPECT_EQ(0u, hostRequiredSize); + auto hostNodeAddress = regularCmdList->getInOrderExecHostGpuAddress(); + EXPECT_EQ(0u, hostNodeAddress); + + debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1); + + regularCmdList = createRegularCmdList(false); + hostRequiredSize = regularCmdList->getInOrderExecHostRequiredSize(); + EXPECT_EQ(sizeof(uint64_t), hostRequiredSize); + hostNodeAddress = regularCmdList->getInOrderExecHostGpuAddress(); + EXPECT_NE(0u, hostNodeAddress); +} + } // namespace ult } // namespace L0 diff --git a/shared/source/helpers/in_order_cmd_helpers.cpp b/shared/source/helpers/in_order_cmd_helpers.cpp index 94103a9513..b48200b501 100644 --- a/shared/source/helpers/in_order_cmd_helpers.cpp +++ b/shared/source/helpers/in_order_cmd_helpers.cpp @@ -165,4 +165,18 @@ void InOrderExecInfo::releaseNotUsedTempTimestampNodes(bool forceReturn) { tempTimestampNodes.swap(tempVector); } +uint64_t InOrderExecInfo::getHostNodeGpuAddress() const { + if (hostCounterNode) { + return hostCounterNode->getGpuAddress() + allocationOffset; + } + return 0; +} + +uint64_t InOrderExecInfo::getDeviceNodeGpuAddress() const { + if (deviceCounterNode) { + return deviceCounterNode->getGpuAddress() + allocationOffset; + } + return 0; +} + } // namespace NEO diff --git a/shared/source/helpers/in_order_cmd_helpers.h b/shared/source/helpers/in_order_cmd_helpers.h index da8753e322..bec47b529b 100644 --- a/shared/source/helpers/in_order_cmd_helpers.h +++ b/shared/source/helpers/in_order_cmd_helpers.h @@ -63,6 +63,23 @@ class InOrderExecInfo : public NEO::NonCopyableClass { uint64_t getBaseDeviceAddress() const { return deviceAddress; } uint64_t getBaseHostGpuAddress() const; + uint64_t getDeviceNodeGpuAddress() const; + uint64_t getHostNodeGpuAddress() const; + size_t getDeviceNodeWriteSize() const { + if (deviceCounterNode) { + const size_t deviceAllocationWriteSize = sizeof(uint64_t) * numDevicePartitionsToWait; + return deviceAllocationWriteSize; + } + return 0; + } + size_t getHostNodeWriteSize() const { + if (hostCounterNode) { + const size_t hostAllocationWriteSize = sizeof(uint64_t) * numHostPartitionsToWait; + return hostAllocationWriteSize; + } + return 0; + } + uint64_t getCounterValue() const { return counterValue; } void addCounterValue(uint64_t addValue) { counterValue += addValue; } void resetCounterValue() { counterValue = 0; } diff --git a/shared/test/unit_test/command_container/command_encoder_tests.cpp b/shared/test/unit_test/command_container/command_encoder_tests.cpp index d3e4bb01b0..84700aa120 100644 --- a/shared/test/unit_test/command_container/command_encoder_tests.cpp +++ b/shared/test/unit_test/command_container/command_encoder_tests.cpp @@ -82,6 +82,10 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingStandaloneInO EXPECT_TRUE(inOrderExecInfo->isExternalMemoryExecInfo()); EXPECT_EQ(2u, inOrderExecInfo->getNumDevicePartitionsToWait()); EXPECT_EQ(3u, inOrderExecInfo->getNumHostPartitionsToWait()); + EXPECT_EQ(0u, inOrderExecInfo->getDeviceNodeWriteSize()); + EXPECT_EQ(0u, inOrderExecInfo->getHostNodeWriteSize()); + EXPECT_EQ(0u, inOrderExecInfo->getDeviceNodeGpuAddress()); + EXPECT_EQ(0u, inOrderExecInfo->getHostNodeGpuAddress()); inOrderExecInfo->reset(); @@ -215,11 +219,17 @@ HWTEST_F(CommandEncoderTests, givenDifferentInputParamsWhenCreatingInOrderExecIn DebugManagerStateRestore restore; debugManager.flags.InOrderDuplicatedCounterStorageEnabled.set(1); - auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, hostNode, mockDevice, 2, false); + constexpr uint32_t partitionCount = 2u; + auto inOrderExecInfo = InOrderExecInfo::create(deviceNode, hostNode, mockDevice, partitionCount, false); EXPECT_EQ(inOrderExecInfo->getBaseHostGpuAddress(), hostNode->getGpuAddress()); EXPECT_NE(inOrderExecInfo->getDeviceCounterAllocation(), inOrderExecInfo->getHostCounterAllocation()); EXPECT_NE(deviceNode->getBaseGraphicsAllocation()->getGraphicsAllocation(0), inOrderExecInfo->getHostCounterAllocation()); + EXPECT_NE(0u, inOrderExecInfo->getDeviceNodeGpuAddress()); + size_t deviceNodeSize = sizeof(uint64_t) * (mockDevice.getGfxCoreHelper().inOrderAtomicSignallingEnabled(mockDevice.getRootDeviceEnvironment()) ? 1u : partitionCount); + EXPECT_EQ(deviceNodeSize, inOrderExecInfo->getDeviceNodeWriteSize()); + EXPECT_NE(0u, inOrderExecInfo->getHostNodeGpuAddress()); + EXPECT_EQ(sizeof(uint64_t) * partitionCount, inOrderExecInfo->getHostNodeWriteSize()); EXPECT_NE(deviceNode->getCpuBase(), inOrderExecInfo->getBaseHostAddress()); EXPECT_EQ(ptrOffset(inOrderExecInfo->getHostCounterAllocation()->getUnderlyingBuffer(), offset), inOrderExecInfo->getBaseHostAddress());