diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 53e6f85bb1..6c0f671aac 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2024,6 +2024,9 @@ inline uint32_t CommandListCoreFamily::getRegionOffsetForAppendMe template inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingAllowed, bool trackDependencies) { + if (latestSentInOrderEvent) { + CommandListCoreFamily::appendWaitOnEvents(1, &latestSentInOrderEvent, relaxedOrderingAllowed, trackDependencies); + } if (numWaitEvents > 0) { if (phWaitEvents) { diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index 5f3d159769..baf117e51e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -167,6 +167,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::appendLaunchKernel( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -351,7 +351,7 @@ template ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernelIndirect( ze_kernel_handle_t kernelHandle, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -388,7 +388,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -409,7 +409,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( NEO::TransferDirection direction; auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size, direction); if (isSplitNeeded) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event ret = static_cast(this->device)->bcsSplit.appendSplitCall(this, dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, true, relaxedOrderingDispatch, direction, [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) { return CommandListCoreFamily::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, 0u, nullptr, relaxedOrderingDispatch); }); @@ -433,7 +433,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegio ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -445,7 +445,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegio NEO::TransferDirection direction; auto isSplitNeeded = this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch), direction); if (isSplitNeeded) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event ret = static_cast(this->device)->bcsSplit.appendSplitCall(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, numWaitEvents, phWaitEvents, true, relaxedOrderingDispatch, direction, [&](uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) { ze_copy_region_t dstRegionLocal = {}; ze_copy_region_t srcRegionLocal = {}; @@ -474,7 +474,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -527,7 +527,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(N bool relaxedOrdering = false; if (isSplitNeeded) { - relaxedOrdering = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event + relaxedOrdering = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event uintptr_t dstAddress = static_cast(dstAllocation->getGpuAddress()); uintptr_t srcAddress = static_cast(srcAllocation->getGpuAddress()); ret = static_cast(this->device)->bcsSplit.appendSplitCall(this, dstAddress, srcAddress, size, nullptr, 0u, nullptr, false, relaxedOrdering, direction, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) { @@ -601,7 +601,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyRegion ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -621,7 +621,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMe ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -642,7 +642,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemo ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -676,7 +676,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchCooperati ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); + relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -1020,4 +1020,11 @@ void CommandListCoreFamilyImmediate::checkAssert() { } } +template +bool CommandListCoreFamilyImmediate::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { + auto numEvents = numWaitEvents + (latestSentInOrderEvent ? 1 : 0); + + return NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numEvents); +} + } // namespace L0 diff --git a/level_zero/core/source/event/event.cpp b/level_zero/core/source/event/event.cpp index 7c88816237..ec4b1d88e5 100644 --- a/level_zero/core/source/event/event.cpp +++ b/level_zero/core/source/event/event.cpp @@ -387,4 +387,15 @@ void Event::resetPackets(bool resetAllPackets) { this->csr = this->device->getNEODevice()->getDefaultEngine().commandStreamReceiver; } +void Event::setIsCompleted() { + if (this->isCompleted.load() == STATE_CLEARED) { + this->isCompleted = STATE_SIGNALED; + + if (latestUsedInOrderCmdList) { + latestUsedInOrderCmdList->unsetLastInOrderOutEvent(this->toHandle()); + latestUsedInOrderCmdList = nullptr; + } + } +} + } // namespace L0 diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index 1897e00e84..234f028413 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -162,11 +162,7 @@ struct Event : _ze_event_handle_t { this->isCompleted.store(disableFromRegularList ? HOST_CACHING_DISABLED_PERMANENT : HOST_CACHING_DISABLED); } - void setIsCompleted() { - if (this->isCompleted.load() == STATE_CLEARED) { - this->isCompleted = STATE_SIGNALED; - } - } + void setIsCompleted(); bool isAlreadyCompleted() { return this->isCompleted == STATE_SIGNALED; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 56bee816cc..a2695e4c8d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1226,6 +1226,52 @@ HWTEST2_F(CommandListCreate, givenDirectSubmissionAndImmCmdListWhenDispatchingTh driverHandle->releaseImportedPointer(dstPtr); } +HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrderingWithoutInputEventsThenCountPreviousEventAsWaitlist, IsAtLeastXeHpcCore) { + DebugManagerStateRestore restore; + DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1); + + ze_command_queue_desc_t desc = {}; + desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + ASSERT_NE(nullptr, commandList); + auto whiteBoxCmdList = static_cast(commandList.get()); + whiteBoxCmdList->setInOrderExecution(true); + + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.count = 1; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE | ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP; + + ze_event_desc_t eventDesc = {}; + eventDesc.wait = ZE_EVENT_SCOPE_FLAG_HOST; + + ze_event_handle_t event = nullptr; + + std::unique_ptr eventPool(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + ASSERT_EQ(ZE_RESULT_SUCCESS, eventPool->createEvent(&eventDesc, &event)); + std::unique_ptr eventObject(L0::Event::fromHandle(event)); + + Mock<::L0::Kernel> kernel; + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + + auto ultCsr = static_cast *>(whiteBoxCmdList->csr); + ultCsr->recordFlusheBatchBuffer = true; + + auto directSubmission = new MockDirectSubmissionHw>(*ultCsr); + ultCsr->directSubmission.reset(directSubmission); + ultCsr->registerClient(); + ultCsr->registerClient(); + + commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, event, 0, nullptr, launchParams, false); + + commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_TRUE(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies); + EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); +} + TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) { DebugManagerStateRestore restorer; DebugManager.flags.EnableFlushTaskSubmission.set(1); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 89ef684a9e..5a617660a1 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -689,8 +689,30 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { return eventPool; } + template + std::unique_ptr>> createImmCmdList() { + auto cmdList = std::make_unique>>(); + + auto csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; + + ze_command_queue_desc_t desc = {}; + + mockCmdQ = std::make_unique>(device, csr, &desc); + + cmdList->cmdQImmediate = mockCmdQ.get(); + cmdList->isFlushTaskSubmissionEnabled = true; + cmdList->setInOrderExecution(true); + cmdList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + cmdList->csr = csr; + cmdList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); + cmdList->commandContainer.setImmediateCmdListCsr(csr); + + return cmdList; + } + DebugManagerStateRestore restorer; + std::unique_ptr> mockCmdQ; ze_result_t returnValue = ZE_RESULT_SUCCESS; ze_group_count_t groupCount = {3, 2, 1}; CmdListKernelLaunchParams launchParams = {}; @@ -698,14 +720,8 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { std::vector> events; }; -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAppendCalledThenHandleEventAssignment, MatchAny) { - ze_command_list_handle_t cmdListHandle; - ze_command_queue_desc_t queueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; - queueDesc.ordinal = 0; - queueDesc.index = 0; - device->createCommandListImmediate(&queueDesc, &cmdListHandle); - auto cmdList = static_cast *>(CommandList::fromHandle(cmdListHandle)); - auto immCmdList = static_cast> *>(cmdList); +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAppendCalledThenHandleEventAssignment, IsAtLeastSkl) { + auto immCmdList = createImmCmdList(); EXPECT_TRUE(immCmdList->isInOrderExecutionEnabled()); @@ -723,18 +739,10 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAppendCalledThenHandleEventAs EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted); EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); - - CommandList::fromHandle(cmdListHandle)->destroy(); } -HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetCmdList, MatchAny) { - ze_command_list_handle_t cmdListHandle; - ze_command_queue_desc_t queueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; - queueDesc.ordinal = 0; - queueDesc.index = 0; - device->createCommandListImmediate(&queueDesc, &cmdListHandle); - auto cmdList = static_cast *>(CommandList::fromHandle(cmdListHandle)); - auto immCmdList = static_cast> *>(cmdList); +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetCmdList, IsAtLeastSkl) { + auto immCmdList = createImmCmdList(); auto eventPool = createEvents(3); @@ -765,8 +773,55 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenResetEventCalledThenResetCmdL EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); +} - CommandList::fromHandle(cmdListHandle)->destroy(); +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSynchronizeEventCalledThenResetCmdList, IsAtLeastSkl) { + auto immCmdList = createImmCmdList(); + + auto eventPool = createEvents(1); + + EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); + EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); + + EXPECT_FALSE(immCmdList->latestInOrderOperationCompleted); + EXPECT_EQ(events[0]->toHandle(), immCmdList->latestSentInOrderEvent); + + uint32_t *hostAddr = static_cast(events[0]->getHostAddress()); + *hostAddr = Event::STATE_SIGNALED; + events[0]->hostSynchronize(-1); + + EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); + EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); + EXPECT_EQ(nullptr, events[0]->latestUsedInOrderCmdList); +} + +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphoreForPreviousDispatch, IsAtLeastSkl) { + auto immCmdList = createImmCmdList(); + + auto eventPool = createEvents(1); + + EXPECT_TRUE(immCmdList->latestInOrderOperationCompleted); + EXPECT_EQ(nullptr, immCmdList->latestSentInOrderEvent); + + auto cmdStream = immCmdList->getCmdContainer().getCommandStream(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); + + auto offset = cmdStream->getUsed(); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, events[0]->toHandle(), 0, nullptr, launchParams, false); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + cmdStream->getUsed() - offset)); + + auto itor = find(cmdList.begin(), cmdList.end()); + + ASSERT_NE(cmdList.end(), itor); } struct CommandListAppendLaunchKernelWithImplicitArgs : CommandListAppendLaunchKernel {