From 36d3c652844bbb1592fcc8d96f73f3c24b537b63 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Tue, 9 May 2023 10:06:53 +0000 Subject: [PATCH] feature: Use Event allocation for cross in-order CL synchronization Related-To: LOCI-4332 Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.h | 2 +- level_zero/core/source/cmdlist/cmdlist_hw.inl | 10 ++-- level_zero/core/source/event/event.h | 1 + .../test_cmdlist_append_launch_kernel_3.cpp | 48 +++++++++++++++++-- 4 files changed, 52 insertions(+), 9 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 24ed6a6581..c0faa201d4 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -168,7 +168,7 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t appendSignalEvent(ze_event_handle_t hEvent) override; ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, bool relaxedOrderingAllowed, bool trackDependencies) override; - void appendWaitOnInOrderDependency(uint32_t waitValue, bool relaxedOrderingAllowed); + void appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, bool relaxedOrderingAllowed); ze_result_t appendWriteGlobalTimestamp(uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override; ze_result_t appendMemoryCopyFromContext(void *dstptr, ze_context_handle_t hContextSrc, const void *srcptr, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 8462ee7014..a8e38c567e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2029,7 +2029,7 @@ inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint } if (inOrderDependencyCounter > 0) { - CommandListCoreFamily::appendWaitOnInOrderDependency(inOrderDependencyCounter, relaxedOrderingAllowed); + CommandListCoreFamily::appendWaitOnInOrderDependency(this->inOrderDependencyCounterAllocation, inOrderDependencyCounter, relaxedOrderingAllowed); } if (numWaitEvents > 0) { @@ -2076,12 +2076,12 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han } template -void CommandListCoreFamily::appendWaitOnInOrderDependency(uint32_t waitValue, bool relaxedOrderingAllowed) { +void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::GraphicsAllocation *dependencyCounterAllocation, uint32_t waitValue, bool relaxedOrderingAllowed) { using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; - commandContainer.addToResidencyContainer(this->inOrderDependencyCounterAllocation); + commandContainer.addToResidencyContainer(dependencyCounterAllocation); - uint64_t gpuAddress = this->inOrderDependencyCounterAllocation->getGpuAddress(); + uint64_t gpuAddress = dependencyCounterAllocation->getGpuAddress(); if (relaxedOrderingAllowed) { NEO::EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, @@ -2135,7 +2135,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu auto event = Event::fromHandle(phEvent[i]); if (event->isInOrderExecEvent()) { - CommandListCoreFamily::appendWaitOnInOrderDependency(event->getInOrderExecSignalValue(), relaxedOrderingAllowed); + CommandListCoreFamily::appendWaitOnInOrderDependency(event->getInOrderExecDataAllocation(), event->getInOrderExecSignalValue(), relaxedOrderingAllowed); continue; } diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index f9f29ca563..abdfa40bf9 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -206,6 +206,7 @@ struct Event : _ze_event_handle_t { void enableInOrderExecMode(NEO::GraphicsAllocation &inOrderDependenciesAllocation, uint32_t signalValue); bool isInOrderExecEvent() const { return inOrderExecEvent; } uint32_t getInOrderExecSignalValue() const { return inOrderExecSignalValue; } + NEO::GraphicsAllocation *getInOrderExecDataAllocation() const { return inOrderExecDataAllocation; } protected: Event(EventPool *eventPool, int index, Device *device) : device(device), eventPool(eventPool), index(index) {} diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index e05f984ed5..955caa8188 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -18,6 +18,7 @@ #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/test_macros/hw_test.h" #include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h" @@ -702,9 +703,9 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { ze_command_queue_desc_t desc = {}; - mockCmdQ = std::make_unique>(device, csr, &desc); + mockCmdQs.emplace_back(std::make_unique>(device, csr, &desc)); - cmdList->cmdQImmediate = mockCmdQ.get(); + cmdList->cmdQImmediate = mockCmdQs[createdCmdLists].get(); cmdList->isFlushTaskSubmissionEnabled = true; cmdList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; cmdList->csr = csr; @@ -712,12 +713,15 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { cmdList->commandContainer.setImmediateCmdListCsr(csr); cmdList->enableInOrderExecution(); + createdCmdLists++; + return cmdList; } DebugManagerStateRestore restorer; - std::unique_ptr> mockCmdQ; + uint32_t createdCmdLists = 0; + std::vector>> mockCmdQs; ze_result_t returnValue = ZE_RESULT_SUCCESS; ze_group_count_t groupCount = {3, 2, 1}; CmdListKernelLaunchParams launchParams = {}; @@ -811,6 +815,44 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingThenProgramSem EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); } +HWTEST2_F(InOrderCmdListTests, givenInOrderEventModeWhenSubmittingFromDifferentCmdListThenProgramSemaphoreForEvent, IsAtLeastSkl) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + + auto immCmdList1 = createImmCmdList(); + auto immCmdList2 = createImmCmdList(); + + auto eventPool = createEvents(1); + + auto cmdStream = immCmdList2->getCmdContainer().getCommandStream(); + + auto event0Handle = events[0]->toHandle(); + + auto ultCsr = static_cast *>(device->getNEODevice()->getDefaultEngine().commandStreamReceiver); + ultCsr->storeMakeResidentAllocations = true; + + immCmdList1->appendLaunchKernel(kernel->toHandle(), &groupCount, event0Handle, 0, nullptr, launchParams, false); + + EXPECT_EQ(1u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]); + + immCmdList2->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 1, &event0Handle, launchParams, false); + + EXPECT_EQ(2u, ultCsr->makeResidentAllocations[immCmdList1->inOrderDependencyCounterAllocation]); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream->getCpuBase(), cmdStream->getUsed())); + + auto itor = find(cmdList.begin(), cmdList.end()); + + ASSERT_NE(cmdList.end(), itor); + + auto semaphoreCmd = genCmdCast(*itor); + + EXPECT_EQ(1u, semaphoreCmd->getSemaphoreDataDword()); + EXPECT_NE(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), immCmdList2->inOrderDependencyCounterAllocation->getGpuAddress()); + EXPECT_EQ(immCmdList1->inOrderDependencyCounterAllocation->getGpuAddress(), semaphoreCmd->getSemaphoreGraphicsAddress()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD, semaphoreCmd->getCompareOperation()); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependencyCounter, IsAtLeastSkl) { auto immCmdList = createImmCmdList();