From 23c08f4bcaf03dc5ada99549416a849fc83631ee Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Fri, 5 May 2023 09:14:00 +0000 Subject: [PATCH] feature: Experimental support of immediate cmd list in-order execution [4/n] Related-To: LOCI-4332 - Simplify CmdList-Event dependency - Add waiting on in-order dependency - Prepare Event for in-order synchronization - Adjust downloading sync allocation in TBX mode Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.h | 1 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 32 ++++- .../source/cmdlist/cmdlist_hw_immediate.h | 2 - .../source/cmdlist/cmdlist_hw_immediate.inl | 7 +- .../core/source/cmdlist/cmdlist_imp.cpp | 7 - level_zero/core/source/cmdlist/cmdlist_imp.h | 3 - level_zero/core/source/event/event.cpp | 15 +- level_zero/core/source/event/event.h | 12 +- level_zero/core/source/event/event_impl.inl | 10 +- .../core/test/unit_tests/mocks/mock_cmdlist.h | 2 - .../sources/cmdlist/test_cmdlist_1.cpp | 58 ++++++++ .../test_cmdlist_append_launch_kernel_3.cpp | 132 ++++++++---------- .../unit_tests/sources/event/test_event.cpp | 32 +++++ .../relaxed_ordering_commands_helper.h | 2 +- 14 files changed, 200 insertions(+), 115 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index e276e42098..24ed6a6581 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -168,6 +168,7 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override; + void appendWaitOnInOrderDependency(uint32_t waitValue, bool relaxedOrderingAllowed); ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 99f50e4880..8462ee7014 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2024,12 +2024,12 @@ inline uint32_t CommandListCoreFamily::getRegionOffsetForAppendMe template inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) { - if (relaxedOrderingAllowed && (numWaitEvents > 0 || latestSentInOrderEvent)) { + if (relaxedOrderingAllowed && (numWaitEvents > 0 || inOrderDependencyCounter > 0)) { NEO::RelaxedOrderingHelper::encodeRegistersBeforeDependencyCheckers(*commandContainer.getCommandStream()); } - if (latestSentInOrderEvent) { - CommandListCoreFamily::appendWaitOnEvents(1, &latestSentInOrderEvent, relaxedOrderingAllowed, trackDependencies); + if (inOrderDependencyCounter > 0) { + CommandListCoreFamily::appendWaitOnInOrderDependency(inOrderDependencyCounter, relaxedOrderingAllowed); } if (numWaitEvents > 0) { @@ -2075,6 +2075,26 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han return ZE_RESULT_SUCCESS; } +template +void CommandListCoreFamily::appendWaitOnInOrderDependency(uint32_t waitValue, bool relaxedOrderingAllowed) { + using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; + + commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); + + uint64_t gpuAddress = this->inOrderDependencyCounterAllocation->getGpuAddress(); + + if (relaxedOrderingAllowed) { + NEO::EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, + NEO::CompareOperation::Less, true); + + } else { + NEO::EncodeSemaphore::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(), + gpuAddress, + waitValue, + COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + } +} + template ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) { using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; @@ -2113,6 +2133,12 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu for (uint32_t i = 0; i < numEvents; i++) { auto event = Event::fromHandle(phEvent[i]); + + if (event->isInOrderExecEvent()) { + CommandListCoreFamily::appendWaitOnInOrderDependency(event->getInOrderExecSignalValue(), relaxedOrderingAllowed); + continue; + } + commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); gpuAddr = event->getCompletionFieldGpuAddress(this->device); uint32_t packetsToWait = event->getPacketsInUse(); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 71aaa5ac34..95855b9d12 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -172,8 +172,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::flushImmediate(ze_res if (isInOrderExecutionEnabled()) { inOrderDependencyCounter++; - latestInOrderOperationCompleted = false; - this->latestSentInOrderEvent = hSignalEvent; - if (hSignalEvent) { - Event::fromHandle(hSignalEvent)->setLatestUsedInOrderCmdList(this); + Event::fromHandle(hSignalEvent)->enableInOrderExecMode(*inOrderDependencyCounterAllocation, inOrderDependencyCounter); } } return inputRet; @@ -1024,7 +1021,7 @@ void CommandListCoreFamilyImmediate::checkAssert() { template bool CommandListCoreFamilyImmediate::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { - auto numEvents = numWaitEvents + (latestSentInOrderEvent ? 1 : 0); + auto numEvents = numWaitEvents + ((inOrderDependencyCounter > 0) ? 1 : 0); return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents); } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 7b2ac63136..28516df6d8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -231,13 +231,6 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s streamProperties.stateBaseAddress.setPropertyGlobalAtomics(cmdListDefaultGlobalAtomics, true); } -void CommandListImp::unsetLastInOrderOutEvent(ze_event_handle_t outEvent) { - if (latestSentInOrderEvent == outEvent) { - latestSentInOrderEvent = nullptr; - latestInOrderOperationCompleted = true; - } -} - void CommandListImp::enableInOrderExecution() { UNRECOVERABLE_IF(inOrderDependencyCounterAllocation); diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index 916bb510c0..87c3ea01ee 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -34,14 +34,11 @@ struct CommandListImp : CommandList { void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties); void enableInOrderExecution(); bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; } - void unsetLastInOrderOutEvent(ze_event_handle_t outEvent); protected: std::unique_ptr nonImmediateLogicalStateHelper; NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr; uint32_t inOrderDependencyCounter = 0; - ze_event_handle_t latestSentInOrderEvent = nullptr; - bool latestInOrderOperationCompleted = true; // If driver is able to detect that previous operation is already done, there is no need to track dependencies. bool inOrderExecutionEnabled = false; ~CommandListImp() override = default; diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index a47be17cc8..b9916e1e80 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -335,10 +335,6 @@ ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t & } ze_result_t Event::destroy() { - if (latestUsedInOrderCmdList) { - latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle()); - } - delete this; return ZE_RESULT_SUCCESS; } @@ -391,12 +387,13 @@ void Event::resetPackets(bool resetAllPackets) { void Event::setIsCompleted() { if (this->isCompleted.load() == STATE_CLEARED) { this->isCompleted = STATE_SIGNALED; - - if (latestUsedInOrderCmdList) { - latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle()); - latestUsedInOrderCmdList = nullptr; - } } } +void Event::enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue) { + inOrderExecEvent = true; + inOrderExecSignalValue = signalValue; + inOrderExecDataAllocation = &inOrderDependenciesAllocation; +} + } // namespace L0 diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index a7c034b284..f9f29ca563 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -39,7 +39,6 @@ struct DriverHandle; struct DriverHandleImp; struct Device; struct Kernel; -struct CommandListImp; #pragma pack(1) struct IpcEventPoolData { @@ -204,10 +203,9 @@ struct Event : _ze_event_handle_t { void setMetricStreamer(MetricStreamer *metricStreamer) { this->metricStreamer = metricStreamer; } - - void setLatestUsedInOrderCmdList(CommandListImp *newCmdList) { - latestUsedInOrderCmdList = newCmdList; - } + void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue); + bool isInOrderExecEvent() const { return inOrderExecEvent; } + uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } protected: Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {} @@ -239,12 +237,13 @@ struct Event : _ze_event_handle_t { Device *device = nullptr; EventPool *eventPool = nullptr; Kernel *kernelWithPrintf = nullptr; - CommandListImp *latestUsedInOrderCmdList = nullptr; + NEO::GraphicsAllocation *inOrderExecDataAllocation = nullptr; uint32_t maxKernelCount = 0; uint32_t kernelCount = 1u; uint32_t maxPacketCount = 0; uint32_t totalEventSize = 0; + uint32_t inOrderExecSignalValue = 0; ze_event_scope_flags_t signalScope = 0u; ze_event_scope_flags_t waitScope = 0u; @@ -257,6 +256,7 @@ struct Event : _ze_event_handle_t { bool usingContextEndOffset = false; bool signalAllEventPackets = false; bool isFromIpcPool = false; + bool inOrderExecEvent = false; }; struct EventPool : _ze_event_pool_handle_t { diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 71fc00cd4c..5fc54799c6 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -186,6 +186,9 @@ ze_result_t EventImp::queryStatus() { if (this->downloadAllocationRequired) { for (auto &csr : csrs) { csr->downloadAllocation(this->getAllocation(this->device)); + if (inOrderExecEvent) { + csr->downloadAllocation(*this->inOrderExecDataAllocation); + } } } @@ -352,9 +355,10 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { template ze_result_t EventImp::reset() { - if (latestUsedInOrderCmdList) { - latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle()); - latestUsedInOrderCmdList = nullptr; + if (inOrderExecEvent) { + inOrderExecDataAllocation = nullptr; + inOrderExecSignalValue = 0; + inOrderExecEvent = false; } this->resetCompletionStatus(); this->resetDeviceCompletionData(false); diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 9d3c584a52..ca196d8b24 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -165,8 +165,6 @@ struct WhiteBox> using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::isSyncModeQueue; using BaseClass::isTbxMode; - using BaseClass::latestInOrderOperationCompleted; - using BaseClass::latestSentInOrderEvent; using BaseClass::partitionCount; using BaseClass::pipeControlMultiKernelEventSync; using BaseClass::pipelineSelectStateTracking; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 3d8d36414a..6282d094e8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -9,6 +9,7 @@ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/helpers/relaxed_ordering_commands_helper.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" @@ -1273,6 +1274,63 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); } +HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrderingThenProgramConditionalBbStart, IsAtLeastXeHpcCore) { + using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; + + DebugManagerStateRestore restore; + DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1); + + auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); + + ze_command_queue_desc_t desc = {}; + + auto mockCmdQ = std::make_unique>(device, ultCsr, &desc); + + auto cmdList = makeZeUniquePtr>>(); + + cmdList->cmdQImmediate = mockCmdQ.get(); + cmdList->isFlushTaskSubmissionEnabled = true; + cmdList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + cmdList->csr = ultCsr; + cmdList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList->commandContainer.setImmediateCmdListCsr(ultCsr); + cmdList->enableInOrderExecution(); + + Mock<::L0::Kernel> kernel; + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + + ultCsr->recordFlusheBatchBuffer = true; + + auto directSubmission = new MockDirectSubmissionHw>(*ultCsr); + ultCsr->directSubmission.reset(directSubmission); + ultCsr->registerClient(); + ultCsr->registerClient(); + + auto cmdStream = cmdList->getCmdContainer().getCommandStream(); + + cmdList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + cmdList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + size_t offset = cmdStream->getUsed(); + + cmdList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + GenCmdList genCmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + genCmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + cmdStream->getUsed() - offset)); + + // init registers + auto lrrCmd = genCmdCast(*genCmdList.begin()); + ASSERT_NE(nullptr, lrrCmd); + lrrCmd++; + lrrCmd++; + + EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart(lrrCmd, 0, cmdList->inOrderDependencyCounterAllocation->getGpuAddress(), 2, NEO::CompareOperation::Less, true)); +} + TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFlushTaskSubmission.set(1); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 927a2f4452..5a55cbc409 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -663,7 +663,9 @@ HWTEST_F(CommandListAppendLaunchKernel, givenInvalidKernelWhenAppendingThenRetur struct InOrderCmdListTests : public CommandListAppendLaunchKernel { struct MockEvent : public EventImp { - using EventImp::latestUsedInOrderCmdList; + using EventImp::inOrderExecDataAllocation; + using EventImp::inOrderExecEvent; + using EventImp::inOrderExecSignalValue; }; void SetUp() override { @@ -721,98 +723,36 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { std::vector> events; }; -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAppendCalledThenHandleEventAssignment, IsAtLeastSkl) { - auto immCmdList = createImmCmdList(); - - EXPECT_TRUE(immCmdList->isInOrderExecutionEnabled()); - - auto eventPool = createEvents(1); - - EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); - - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); - - EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(events[0]->toHandle(), immCmdList->latestSentInOrderEvent); - - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); - - EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); -} - -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetCmdList, IsAtLeastSkl) { +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetEventState, IsAtLeastSkl) { auto immCmdList = createImmCmdList(); auto eventPool = createEvents(3); - EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); - EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(events[0]->toHandle(), immCmdList->latestSentInOrderEvent); + EXPECT_TRUE(events[0]->inOrderExecEvent); + EXPECT_EQ(events[0]->inOrderExecSignalValue, immCmdList->inOrderDependencyCounter); + EXPECT_EQ(events[0]->inOrderExecDataAllocation, immCmdList->inOrderDependencyCounterAllocation); events[0]->reset(); - EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); - - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[1]->toHandle(), 0, nullptr, launchParams, false); - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[2]->toHandle(), 0, nullptr, launchParams, false); - - // reset unused event - events[1]->reset(); - EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(events[2]->toHandle(), immCmdList->latestSentInOrderEvent); - - // destroy - events[2]->destroy(); - events[2].release(); - - EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); -} - -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSynchronizeEventCalledThenResetCmdList, IsAtLeastSkl) { - auto immCmdList = createImmCmdList(); - - auto eventPool = createEvents(1); - - EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); - - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); - - EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(events[0]->toHandle(), immCmdList->latestSentInOrderEvent); - - uint32_t *hostAddr = static_cast(events[0]->getHostAddress()); - *hostAddr = Event::STATE_SIGNALED; - events[0]->hostSynchronize(-1); - - EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); - EXPECT_EQ(nullptr, events[0]->latestUsedInOrderCmdList); + EXPECT_FALSE(events[0]->inOrderExecEvent); + EXPECT_EQ(events[0]->inOrderExecSignalValue, 0u); + EXPECT_EQ(events[0]->inOrderExecDataAllocation, nullptr); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastSkl) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + auto immCmdList = createImmCmdList(); - auto eventPool = createEvents(1); - - EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); - EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); - auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); auto offset = cmdStream->getUsed(); - immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( @@ -823,6 +763,50 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor auto itor = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itor); + + auto semaphoreCmd = genCmdCast(*itor); + + EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); +} + +HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSemaphoreForEvent, IsAtLeastSkl) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto immCmdList = createImmCmdList(); + + auto eventPool = createEvents(1); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + auto event0Handle = events[0]->toHandle(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, event0Handle, 0, nullptr, launchParams, false); + + auto offset = cmdStream->getUsed(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 1, &event0Handle, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + cmdStream->getUsed() - offset)); + + auto itor = find(cmdList.begin(), cmdList.end()); + + itor++; // skip implicit dependency + + ASSERT_NE(cmdList.end(), itor); + + auto semaphoreCmd = genCmdCast(*itor); + + EXPECT_EQ(2u, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependencyCounter, IsAtLeastSkl) { diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index f95621599f..bd5803fd89 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -12,6 +12,7 @@ #include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_csr.h" +#include "shared/test/common/mocks/mock_graphics_allocation.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/mock_memory_operations_handler.h" #include "shared/test/common/mocks/mock_ostime.h" @@ -30,6 +31,7 @@ #include "level_zero/core/test/unit_tests/mocks/mock_event.h" #include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" +#include "level_zero/core/test/unit_tests/sources/helper/ze_object_utils.h" #include #include @@ -2843,6 +2845,36 @@ HWTEST_F(EventTests, event->destroy(); } +HWTEST_F(EventTests, givenInOrderEventWhenHostEventSyncThenExpectDownloadEventAllocationWithEachQuery) { + std::map downloadAllocationTrack; + + neoDevice->getUltCommandStreamReceiver().commandStreamReceiverType = CommandStreamReceiverType::CSR_TBX; + neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->memoryOperationsInterface = + std::make_unique(); + auto event = zeUniquePtr(whiteboxCast(getHelper().createEvent(eventPool.get(), &eventDesc, device))); + + ASSERT_NE(event, nullptr); + + TagAddressType *eventAddress = static_cast(event->getHostAddress()); + *eventAddress = Event::STATE_SIGNALED; + + auto ultCsr = static_cast *>(event->csrs[0]); + VariableBackup> backupCsrDownloadImpl(&ultCsr->downloadAllocationImpl); + ultCsr->downloadAllocationImpl = [&downloadAllocationTrack](GraphicsAllocation &gfxAllocation) { + downloadAllocationTrack[&gfxAllocation]++; + }; + + NEO::MockGraphicsAllocation allocation; + event->enableInOrderExecMode(allocation, 1); + + constexpr uint64_t timeout = std::numeric_limits::max(); + auto result = event->hostSynchronize(timeout); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_NE(0u, downloadAllocationTrack[&allocation]); + EXPECT_EQ(1u, ultCsr->downloadAllocationsCalledCount); +} + HWTEST_F(EventTests, GivenEventIsReadyToDownloadAllAlocationsWhenDownloadAllocationNotRequiredThenDontDownloadAllocations) { neoDevice->getUltCommandStreamReceiver().commandStreamReceiverType = CommandStreamReceiverType::CSR_HW; diff --git a/shared/test/common/helpers/relaxed_ordering_commands_helper.h b/shared/test/common/helpers/relaxed_ordering_commands_helper.h index 9d6eeac767..5ecd54ed0b 100644 --- a/shared/test/common/helpers/relaxed_ordering_commands_helper.h +++ b/shared/test/common/helpers/relaxed_ordering_commands_helper.h @@ -195,7 +195,7 @@ bool verifyBaseConditionalBbStart(void *cmd, CompareOperation compareOperation, } auto predicateCmd = reinterpret_cast(++lrrCmd); - if (compareOperation == CompareOperation::Equal) { + if ((compareOperation == CompareOperation::Equal) || (compareOperation == CompareOperation::Less)) { if (!verifyMiPredicate(predicateCmd, MiPredicateType::NoopOnResult2Clear)) { return false; }