diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index be890d19e9..9c609dd6b0 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2131,10 +2131,6 @@ inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint if (hasInOrderDependencies) { CommandListCoreFamily::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed); - - if (NEO::EncodeUserInterruptHelper::isOperationAllowed(NEO::EncodeUserInterruptHelper::afterSemaphoreMask)) { - NEO::EnodeUserInterrupt::encode(*commandContainer.getCommandStream()); - } } if (numWaitEvents > 0) { @@ -2210,6 +2206,10 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::Gr gpuAddress += sizeof(uint64_t); } + + if (NEO::EncodeUserInterruptHelper::isOperationAllowed(NEO::EncodeUserInterruptHelper::afterSemaphoreMask)) { + NEO::EnodeUserInterrupt::encode(*commandContainer.getCommandStream()); + } } template @@ -2232,6 +2232,10 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu callId = neoDevice->getRootDeviceEnvironment().tagsManager->currentCallCount; } + if (signalInOrderCompletion) { + CommandListCoreFamily::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, this->inOrderDependencyCounter, this->inOrderAllocationOffset, relaxedOrderingAllowed); + } + bool dcFlushRequired = false; if (this->dcFlushSupport) { @@ -2395,12 +2399,9 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - if (numWaitEvents > 0) { - if (phWaitEvents) { - CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents, false, true, false); - } else { - return ZE_RESULT_ERROR_INVALID_ARGUMENT; - } + ze_result_t ret = addEventsToCmdList(numWaitEvents, phWaitEvents, false, true); + if (ret != ZE_RESULT_SUCCESS) { + return ret; } Event *signalEvent = nullptr; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 67a066b73f..8c73943d88 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -1831,6 +1831,7 @@ HWTEST2_F(InOrderCmdListTests, givenCopyOnlyInOrderModeWhenProgrammingCopyRegion HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEventsThenSignalSyncAllocation, IsAtLeastXeHpCore) { using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; auto immCmdList = createImmCmdList(); @@ -1841,6 +1842,7 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents auto eventHandle = events[0]->toHandle(); immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, eventHandle, 0, nullptr, launchParams, false); + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); auto offset = cmdStream->getUsed(); @@ -1851,14 +1853,23 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingAppendWaitOnEvents ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset))); - auto sdiItor = find(cmdList.begin(), cmdList.end()); + auto semaphoreItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), semaphoreItor); + + auto semaphoreCmd = genCmdCast(*semaphoreItor); + ASSERT_NE(nullptr, semaphoreCmd); + + EXPECT_EQ(2u, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + + auto sdiItor = find(semaphoreItor, cmdList.end()); ASSERT_NE(cmdList.end(), sdiItor); auto sdiCmd = genCmdCast(*sdiItor); EXPECT_EQ(immCmdList->inOrderDependencyCounterAllocation->getGpuAddress(), sdiCmd->getAddress()); EXPECT_EQ(0u, sdiCmd->getStoreQword()); - EXPECT_EQ(2u, sdiCmd->getDataDword0()); + EXPECT_EQ(3u, sdiCmd->getDataDword0()); } HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenProgrammingCounterWithOverflowThenHandleItCorrectly, IsAtLeastXeHpCore) {