diff --git a/level_zero/api/core/ze_event_api_entrypoints.h b/level_zero/api/core/ze_event_api_entrypoints.h index b533fed584..f5708382d0 100644 --- a/level_zero/api/core/ze_event_api_entrypoints.h +++ b/level_zero/api/core/ze_event_api_entrypoints.h @@ -58,7 +58,7 @@ ze_result_t zeEventPoolCloseIpcHandle( ze_result_t zeCommandListAppendSignalEvent( ze_command_list_handle_t hCommandList, ze_event_handle_t hEvent) { - return L0::CommandList::fromHandle(hCommandList)->appendSignalEvent(hEvent); + return L0::CommandList::fromHandle(hCommandList)->appendSignalEvent(hEvent, false); } ze_result_t zeCommandListAppendWaitOnEvents( diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index 0d05d38636..c44d43a4bf 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -133,7 +133,7 @@ struct CommandList : _ze_command_list_handle_t { size_t patternSize, size_t size, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0; virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0; - virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent) = 0; + virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent, bool relaxedOrderingDispatch) = 0; virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) = 0; virtual ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 841d1009ec..f7d43933f4 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -179,7 +179,7 @@ struct CommandListCoreFamily : public CommandListImp { uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t hostSynchronize(uint64_t timeout) override; - ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; + ze_result_t appendSignalEvent(ze_event_handle_t hEvent, bool relaxedOrderingDispatch) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds, bool relaxedOrderingAllowed, bool trackDependencies, bool apiRequest, bool skipAddingWaitEventsToResidency, bool skipFlush, bool copyOffloadOperation) override; void appendWaitOnInOrderDependency(std::shared_ptr &inOrderExecInfo, CommandToPatchContainer *outListCommands, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 012955a8a5..a267a5f8ce 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2466,9 +2466,9 @@ inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint } template -ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_handle_t hEvent) { +ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_handle_t hEvent, bool relaxedOrderingDispatch) { if (this->isInOrderExecutionEnabled()) { - handleInOrderImplicitDependencies(isRelaxedOrderingDispatchAllowed(0, false), false); + handleInOrderImplicitDependencies(relaxedOrderingDispatch, false); } auto event = Event::fromHandle(hEvent); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index afb0c33a14..bdbd3e225e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -97,7 +97,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::appendMemoryFill(void } template -ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_event_handle_t hSignalEvent) { +ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_event_handle_t hSignalEvent, bool relaxedOrderingDispatch) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; ze_result_t ret = ZE_RESULT_SUCCESS; + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(0, false); + bool hasStallingCmds = !Event::fromHandle(hSignalEvent)->isCounterBased() || hasStallingCmdsForRelaxedOrdering(0, relaxedOrderingDispatch); + checkAvailableSpace(0, false, commonImmediateCommandSize); - ret = CommandListCoreFamily::appendSignalEvent(hSignalEvent); - return flushImmediate(ret, true, true, false, false, false, hSignalEvent, false); + ret = CommandListCoreFamily::appendSignalEvent(hSignalEvent, relaxedOrderingDispatch); + return flushImmediate(ret, true, hasStallingCmds, relaxedOrderingDispatch, false, false, hSignalEvent, false); } template @@ -763,7 +766,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(N this->appendMemoryCopyBlit(dstAddressParam, dstAllocation, 0u, srcAddressParam, srcAllocation, 0u, sizeParam); - return CommandListCoreFamily::appendSignalEvent(hSignalEventParam); + return CommandListCoreFamily::appendSignalEvent(hSignalEventParam, false); }); } else { ret = CommandListCoreFamily::appendPageFaultCopy(dstAllocation, srcAllocation, size, flushHost); @@ -1527,7 +1530,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendCommandLists(ui bool relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false); if (hSignalEvent) { - ret = CommandListCoreFamily::appendSignalEvent(hSignalEvent); + ret = CommandListCoreFamily::appendSignalEvent(hSignalEvent, false); } if (ret != ZE_RESULT_SUCCESS) { diff --git a/level_zero/core/source/device/bcs_split.h b/level_zero/core/source/device/bcs_split.h index b0799e0f1e..a94731e078 100644 --- a/level_zero/core/source/device/bcs_split.h +++ b/level_zero/core/source/device/bcs_split.h @@ -85,7 +85,7 @@ struct BcsSplit { auto barrierRequired = !cmdList->isInOrderExecutionEnabled() && cmdList->isBarrierRequired(); if (barrierRequired) { - cmdList->appendSignalEvent(this->events.barrier[markerEventIndex]->toHandle()); + cmdList->appendSignalEvent(this->events.barrier[markerEventIndex]->toHandle(), false); } auto subcopyEventIndex = markerEventIndex * this->cmdQs.size(); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index ae790fa3a1..5790fb1de6 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -353,7 +353,7 @@ void ImmediateCmdListSharedHeapsFlushTaskFixtureInit::appendNonKernelOperation(L result = currentCmdList->appendBarrier(nullptr, 0, nullptr, false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } else if (operation == NonKernelOperation::SignalEvent) { - result = currentCmdList->appendSignalEvent(event->toHandle()); + result = currentCmdList->appendSignalEvent(event->toHandle(), false); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } else if (operation == NonKernelOperation::ResetEvent) { result = currentCmdList->appendEventReset(event->toHandle()); diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 719df45236..535f75bc3c 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -473,7 +473,7 @@ struct MockCommandList : public CommandList { ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch)); ADDMETHOD_NOBASE(appendSignalEvent, ze_result_t, ZE_RESULT_SUCCESS, - (ze_event_handle_t hEvent)); + (ze_event_handle_t hEvent, bool relaxedOrderingDispatch)); ADDMETHOD_NOBASE(appendWaitOnEvents, ze_result_t, ZE_RESULT_SUCCESS, (uint32_t numEvents, diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 70c4e4a836..e1968d5901 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1073,7 +1073,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendSignalEvent ASSERT_NE(nullptr, eventObject->csrs[0]); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]); - commandList->appendSignalEvent(event); + commandList->appendSignalEvent(event, false); auto result = eventObject->hostSignal(false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); @@ -1195,7 +1195,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh verifyFlags(commandList->appendEventReset(event), true, true); - verifyFlags(commandList->appendSignalEvent(event), true, true); + verifyFlags(commandList->appendSignalEvent(event, false), true, true); verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false, false); @@ -1597,7 +1597,7 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh verifyFlags(commandList->appendEventReset(event), false, false); - verifyFlags(commandList->appendSignalEvent(event), false, false); + verifyFlags(commandList->appendSignalEvent(event, false), false, false); verifyFlags(commandList->appendPageFaultCopy(kernel.getIsaAllocation(), kernel.getIsaAllocation(), 1, false), false, false); @@ -2006,7 +2006,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen const auto oldCsr = queue->csr; queue->csr = &mockCommandStreamReceiver; - returnValue = commandList->appendSignalEvent(event); + returnValue = commandList->appendSignalEvent(event, false); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, returnValue); queue->csr = oldCsr; @@ -2113,7 +2113,7 @@ HWTEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmediateCommandListAndAppen returnValue = commandList->appendBarrier(nullptr, 1, &event, false); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); - returnValue = commandList->appendSignalEvent(event); + returnValue = commandList->appendSignalEvent(event, false); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); returnValue = eventObject->hostSignal(false); @@ -2283,7 +2283,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEven ASSERT_NE(nullptr, eventObject->csrs[0]); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]); - commandList->appendSignalEvent(event); + commandList->appendSignalEvent(event, false); auto result = eventObject->hostSignal(false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index adb41f2d5d..20dbe6ea30 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -1234,7 +1234,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEven ASSERT_NE(nullptr, eventObject->csrs[0]); ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, eventObject->csrs[0]); - commandList->appendSignalEvent(event); + commandList->appendSignalEvent(event, false); auto result = eventObject->hostSignal(false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index 6854e5478f..d94aba26f2 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -547,7 +547,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::compute, returnValue)); - result = commandList->appendSignalEvent(event->toHandle()); + result = commandList->appendSignalEvent(event->toHandle(), false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); @@ -579,7 +579,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::compute, returnValue)); - result = commandList->appendSignalEvent(event->toHandle()); + result = commandList->appendSignalEvent(event->toHandle(), false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); @@ -611,7 +611,7 @@ HWTEST_F(CommandListImmediateFlushTaskComputeTests, givenUseCsrImmediateSubmissi ze_result_t returnValue = ZE_RESULT_SUCCESS; std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::compute, returnValue)); - result = commandList->appendSignalEvent(event->toHandle()); + result = commandList->appendSignalEvent(event->toHandle(), false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); context->destroy(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index 5fb8d29a0b..fafbfecdf5 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -566,7 +566,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendSignalEventThe MockEvent event; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; - commandList->appendSignalEvent(event.toHandle()); + commandList->appendSignalEvent(event.toHandle(), false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); @@ -583,7 +583,7 @@ HWTEST_F(CommandListCreate, givenCommandListWhenAppendSignalEventWithScopeThenPi MockEvent event; event.waitScope = ZE_EVENT_SCOPE_FLAG_HOST; event.signalScope = ZE_EVENT_SCOPE_FLAG_HOST; - commandList->appendSignalEvent(event.toHandle()); + commandList->appendSignalEvent(event.toHandle(), false); GenCmdList cmdList; ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp index aa34265403..02952d0e35 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_2.cpp @@ -572,7 +572,7 @@ HWTEST_F(CommandListAppendLaunchKernelSWTags, givenEnableSWTagsWhenAppendSignalE eventPool->createEvent(&eventDesc, &hEvent); - auto result = commandList->appendSignalEvent(hEvent); + auto result = commandList->appendSignalEvent(hEvent, false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = cmdStream->getUsed(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index 9d99dc9366..d421785bc7 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -33,7 +33,7 @@ HWTEST_F(CommandListAppendSignalEvent, WhenAppendingSignalEventWithoutScopeThenM using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed(); - auto result = commandList->appendSignalEvent(event->toHandle()); + auto result = commandList->appendSignalEvent(event->toHandle(), false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed(); @@ -51,7 +51,7 @@ HWTEST_F(CommandListAppendSignalEvent, WhenAppendingSignalEventWithoutScopeThenM } HWTEST_F(CommandListAppendSignalEvent, givenCmdlistWhenAppendingSignalEventThenEventPoolGraphicsAllocationIsAddedToResidencyContainer) { - auto result = commandList->appendSignalEvent(event->toHandle()); + auto result = commandList->appendSignalEvent(event->toHandle(), false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto &residencyContainer = commandList->getCmdContainer().getResidencyContainer(); @@ -81,7 +81,7 @@ HWTEST_F(CommandListAppendSignalEvent, givenEventWithScopeFlagDeviceWhenAppendin auto eventHostVisible = std::unique_ptr(Event::create(eventPoolHostVisible.get(), &eventDesc, device)); auto usedSpaceBefore = commandList->getCmdContainer().getCommandStream()->getUsed(); - result = commandList->appendSignalEvent(eventHostVisible->toHandle()); + result = commandList->appendSignalEvent(eventHostVisible->toHandle(), false); ASSERT_EQ(ZE_RESULT_SUCCESS, result); auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed(); @@ -365,7 +365,7 @@ HWTEST2_F(CommandListAppendSignalEvent, givenTimestampEventUsedInSignalThenPipeC EXPECT_EQ(ZE_RESULT_SUCCESS, result); auto event = std::unique_ptr(L0::Event::create(eventPool.get(), &eventDesc, device)); - commandList->appendSignalEvent(event->toHandle()); + commandList->appendSignalEvent(event->toHandle(), false); auto contextOffset = event->getContextEndOffset(); auto baseAddr = event->getGpuAddress(device); auto gpuAddress = ptrOffset(baseAddr, contextOffset); @@ -410,7 +410,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; commandList->partitionCount = packets; - ze_result_t returnValue = commandList->appendSignalEvent(event->toHandle()); + ze_result_t returnValue = commandList->appendSignalEvent(event->toHandle(), false); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(packets, event->getPacketsInUse()); @@ -455,7 +455,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy event->setEventTimestampFlag(true); commandList->partitionCount = 2; - EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle())); + EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle(), false)); size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false); @@ -510,7 +510,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy offset = cmdStream->getUsed(); - EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle())); + EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle(), false)); expectedSize = sizeof(MI_STORE_DATA_IMM); usedSize = cmdStream->getUsed() - offset; @@ -547,7 +547,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->signalScope = 0; commandList->partitionCount = packets; - ze_result_t returnValue = commandList->appendSignalEvent(event->toHandle()); + ze_result_t returnValue = commandList->appendSignalEvent(event->toHandle(), false); EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); EXPECT_EQ(packets, event->getPacketsInUse()); @@ -885,7 +885,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, event->setEventTimestampFlag(false); - commandList->appendSignalEvent(event->toHandle()); + commandList->appendSignalEvent(event->toHandle(), false); size_t usedAfterSize = cmdStream->getUsed(); GenCmdList cmdList; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index ec95bc1b5b..16569c3961 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -261,12 +261,12 @@ class MockCommandListImmediateHwWithWaitEventFail : public WhiteBox<::L0::Comman return BaseClass::appendWaitOnEvents(numEvents, phEvent, outWaitCmds, relaxedOrderingAllowed, trackDependencies, apiRequest, skipAddingWaitEventsToResidency, skipFlush, copyOffloadOperation); }; - ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override { + ze_result_t appendSignalEvent(ze_event_handle_t hEvent, bool relaxedOrderingDispatch) override { if (forceSignalEventError) { return ZE_RESULT_ERROR_INVALID_ARGUMENT; } appendSignalEventCalled++; - return BaseClass::appendSignalEvent(hEvent); + return BaseClass::appendSignalEvent(hEvent, relaxedOrderingDispatch); } ze_result_t executeCommandListImmediate(bool performMigration) override { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 916b9db54a..60e5fa77df 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -737,7 +737,7 @@ struct CommandListSignalAllEventPacketFixture : public ModuleFixture { ASSERT_NE(nullptr, event.get()); size_t sizeBefore = cmdStream->getUsed(); - result = commandList->appendSignalEvent(event->toHandle()); + result = commandList->appendSignalEvent(event->toHandle(), false); size_t sizeAfter = cmdStream->getUsed(); EXPECT_EQ(ZE_RESULT_SUCCESS, result); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp index 59d12898cb..fce863628b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_1.cpp @@ -1512,7 +1512,7 @@ HWTEST2_F(InOrderCmdListTests, givenImmediateCmdListWhenDispatchingWithRegularEv } events[0]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation()); - immCmdList->appendSignalEvent(eventHandle); + immCmdList->appendSignalEvent(eventHandle, false); if (dcFlushRequired) { EXPECT_EQ(Event::CounterBasedMode::initiallyDisabled, events[0]->counterBasedMode); } else { @@ -1639,7 +1639,7 @@ HWTEST2_F(InOrderCmdListTests, givenNonInOrderCmdListWhenPassingCounterBasedEven EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, copyOnlyCmdList->appendBlitFill(alloc, ©Data, 1, 16, events[0].get(), 0, nullptr, false)); - EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendSignalEvent(eventHandle)); + EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendSignalEvent(eventHandle, false)); EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, immCmdList->appendWriteGlobalTimestamp(reinterpret_cast(copyData), eventHandle, 0, nullptr)); @@ -1892,6 +1892,53 @@ HWTEST2_F(InOrderCmdListTests, givenCmdsChainingWhenDispatchingKernelWithRelaxed findConditionalBbStarts(1); // implicit dependency } +HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingEnabledWhenSignalEventCalledThenPassStallingCmdsInfo, IsAtLeastXeHpcCore) { + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + + debugManager.flags.DirectSubmissionRelaxedOrdering.set(1); + + auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); + ultCsr->recordFlushedBatchBuffer = true; + + auto directSubmission = new MockDirectSubmissionHw>(*ultCsr); + ultCsr->directSubmission.reset(directSubmission); + + auto verifyFlags = [&ultCsr](bool relaxedOrderingExpected, bool stallingCmdsExpected) { + EXPECT_EQ(stallingCmdsExpected, ultCsr->recordedImmediateDispatchFlags.hasStallingCmds); + EXPECT_EQ(stallingCmdsExpected, ultCsr->latestFlushedBatchBuffer.hasStallingCmds); + + EXPECT_EQ(relaxedOrderingExpected, ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); + }; + + auto immCmdList0 = createImmCmdList(); + immCmdList0->appendLaunchKernel(kernel->toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); // NP state init + + auto immCmdList1 = createImmCmdList(); + auto immCmdList2 = createImmCmdList(); + + auto eventPool = createEvents(2, false); + events[1]->makeCounterBasedInitiallyDisabled(eventPool->getAllocation()); + auto nonCbEvent = events[1]->toHandle(); + + immCmdList1->appendSignalEvent(events[0]->toHandle(), true); + verifyFlags(false, false); // no dependencies + + immCmdList2->appendSignalEvent(events[0]->toHandle(), false); + verifyFlags(false, false); // no dependencies + + immCmdList1->appendSignalEvent(events[0]->toHandle(), true); + verifyFlags(true, false); // relaxed ordering with implicit dependency + + immCmdList1->appendSignalEvent(nonCbEvent, true); + verifyFlags(true, true); // relaxed ordering with implicit dependency + + immCmdList1->cmdQImmediate->unregisterCsrClient(); + immCmdList2->cmdQImmediate->unregisterCsrClient(); + + immCmdList1->appendSignalEvent(events[0]->toHandle(), false); + verifyFlags(false, true); // relaxed ordering disabled == stalling semaphore +} + HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenWaitingForEventFromPreviousAppendThenSkip, IsAtLeastXeHpCore) { using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; @@ -2798,7 +2845,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendSignalEventT auto offset = cmdStream->getUsed(); - immCmdList->appendSignalEvent(events[0]->toHandle()); + immCmdList->appendSignalEvent(events[0]->toHandle(), false); auto inOrderExecInfo = immCmdList->inOrderExecInfo; uint64_t sdiSyncVa = 0; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index 872603c56f..70cce92aa8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -1405,7 +1405,7 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL regularCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false); - regularCmdList->appendSignalEvent(eventHandle); + regularCmdList->appendSignalEvent(eventHandle, false); regularCmdList->appendBarrier(nullptr, 1, &eventHandle, false); @@ -1685,7 +1685,7 @@ HWTEST2_F(StandaloneInOrderTimestampAllocationTests, givenSignalScopeEventWhenSi size_t offset = cmdStream->getUsed(); { - cmdList->appendSignalEvent(events[1]->toHandle()); + cmdList->appendSignalEvent(events[1]->toHandle(), false); GenCmdList hwCmdList; EXPECT_TRUE(FamilyType::Parse::parseCommandBuffer(hwCmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset))); @@ -1697,7 +1697,7 @@ HWTEST2_F(StandaloneInOrderTimestampAllocationTests, givenSignalScopeEventWhenSi offset = cmdStream->getUsed(); { - cmdList->appendSignalEvent(events[0]->toHandle()); + cmdList->appendSignalEvent(events[0]->toHandle(), false); GenCmdList hwCmdList; EXPECT_TRUE(FamilyType::Parse::parseCommandBuffer(hwCmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset))); diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp index f8e0ddebd3..8a99677c47 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp @@ -383,7 +383,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediat returnValue = commandList->appendBarrier(nullptr, 1, &event, false); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); - returnValue = commandList->appendSignalEvent(event); + returnValue = commandList->appendSignalEvent(event, false); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = eventObject->hostSignal(false); @@ -447,7 +447,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmedia returnValue = commandList->appendBarrier(nullptr, 1, &event, false); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); - returnValue = commandList->appendSignalEvent(event); + returnValue = commandList->appendSignalEvent(event, false); EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS); returnValue = eventObject->hostSignal(false); diff --git a/level_zero/tools/source/metrics/metric_oa_query_imp.cpp b/level_zero/tools/source/metrics/metric_oa_query_imp.cpp index a203d40170..3962082e18 100644 --- a/level_zero/tools/source/metrics/metric_oa_query_imp.cpp +++ b/level_zero/tools/source/metrics/metric_oa_query_imp.cpp @@ -901,7 +901,7 @@ ze_result_t OaMetricQueryImp::writeMetricQuery(CommandList &commandList, ze_even // Write completion event. if (result && writeCompletionEvent) { - result = commandList.appendSignalEvent(hSignalEvent) == ZE_RESULT_SUCCESS; + result = commandList.appendSignalEvent(hSignalEvent, false) == ZE_RESULT_SUCCESS; } return result ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;