From ef10c9849758a55eea741660ea0205920891b75a Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Fri, 28 Apr 2023 10:21:20 +0000 Subject: [PATCH] feature: Experimental support of immediate cmd list in-order execution [3/n] New allocation to track dependencies counter Related-To: LOCI-4332 Signed-off-by: Dunajski, Bartosz --- .../source/cmdlist/cmdlist_hw_immediate.h | 2 + .../source/cmdlist/cmdlist_hw_immediate.inl | 3 ++ .../core/source/cmdlist/cmdlist_imp.cpp | 30 +++++++++++-- level_zero/core/source/cmdlist/cmdlist_imp.h | 4 +- .../core/test/unit_tests/mocks/mock_cmdlist.h | 2 + .../sources/cmdlist/test_cmdlist_1.cpp | 8 ++-- .../test_cmdlist_append_launch_kernel_3.cpp | 43 +++++++++++++++++-- 7 files changed, 81 insertions(+), 11 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index baf117e51e..71aaa5ac34 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -170,6 +170,8 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily::flushImmediate(ze_res } if (isInOrderExecutionEnabled()) { + inOrderDependencyCounter++; + latestInOrderOperationCompleted = false; this->latestSentInOrderEvent = hSignalEvent; + if (hSignalEvent) { Event::fromHandle(hSignalEvent)->setLatestUsedInOrderCmdList(this); } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 7203f011ce..7b2ac63136 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -16,6 +16,7 @@ #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/logical_state_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" +#include "shared/source/memory_manager/allocation_properties.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/sys_calls_common.h" @@ -74,6 +75,8 @@ ze_result_t CommandListImp::destroy() { } } + device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(inOrderDependencyCounterAllocation); + delete this; return ZE_RESULT_SUCCESS; } @@ -166,10 +169,6 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device commandList->cmdListType = CommandListType::TYPE_IMMEDIATE; commandList->isSyncModeQueue = (desc->mode == ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS); - if (NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.get() == 1) { - commandList->setInOrderExecution(true); - } - if (!internalUsage) { auto &productHelper = device->getProductHelper(); commandList->isFlushTaskSubmissionEnabled = gfxCoreHelper.isPlatformFlushTaskEnabled(productHelper); @@ -185,6 +184,11 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device csr->initializeResources(); csr->initDirectSubmission(); returnValue = commandList->initialize(device, engineGroupType, desc->flags); + + if (NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.get() == 1) { + commandList->enableInOrderExecution(); + } + if (returnValue != ZE_RESULT_SUCCESS) { commandList->destroy(); commandList = nullptr; @@ -234,4 +238,22 @@ void CommandListImp::unsetLastInOrderOutEvent(ze_event_handle_t outEvent) { } } +void CommandListImp::enableInOrderExecution() { + UNRECOVERABLE_IF(inOrderDependencyCounterAllocation); + + auto device = this->device->getNEODevice(); + + NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), sizeof(uint32_t), NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; + + inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); + + UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation); + + commandContainer.addToResidencyContainer(inOrderDependencyCounterAllocation); + + memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + + inOrderExecutionEnabled = true; +} + } // namespace L0 diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index b414b23e86..916bb510c0 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -32,12 +32,14 @@ struct CommandListImp : CommandList { virtual NEO::LogicalStateHelper *getLogicalStateHelper() const { return nonImmediateLogicalStateHelper.get(); } void setStreamPropertiesDefaultSettings(NEO::StreamProperties &streamProperties); - void setInOrderExecution(bool enabled) { inOrderExecutionEnabled = enabled; } + void enableInOrderExecution(); bool isInOrderExecutionEnabled() const { return inOrderExecutionEnabled; } void unsetLastInOrderOutEvent(ze_event_handle_t outEvent); protected: std::unique_ptr nonImmediateLogicalStateHelper; + NEO::GraphicsAllocation *inOrderDependencyCounterAllocation = nullptr; + uint32_t inOrderDependencyCounter = 0; ze_event_handle_t latestSentInOrderEvent = nullptr; bool latestInOrderOperationCompleted = true; // If driver is able to detect that previous operation is already done, there is no need to track dependencies. bool inOrderExecutionEnabled = false; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index a3fbef8e15..9d3c584a52 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -160,6 +160,8 @@ struct WhiteBox> using BaseClass::getDcFlushRequired; using BaseClass::getHostPtrAlloc; using BaseClass::immediateCmdListHeapSharing; + using BaseClass::inOrderDependencyCounter; + using BaseClass::inOrderDependencyCounterAllocation; using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::isSyncModeQueue; using BaseClass::isTbxMode; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp index 7ce48da651..b275460997 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp @@ -1233,10 +1233,10 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering ze_command_queue_desc_t desc = {}; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; ze_result_t returnValue; - std::unique_ptr commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue)); + auto commandList = CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::RenderCompute, returnValue); ASSERT_NE(nullptr, commandList); - auto whiteBoxCmdList = static_cast(commandList.get()); - whiteBoxCmdList->setInOrderExecution(true); + auto whiteBoxCmdList = static_cast(commandList); + whiteBoxCmdList->enableInOrderExecution(); ze_event_pool_desc_t eventPoolDesc = {}; eventPoolDesc.count = 1; @@ -1270,6 +1270,8 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); EXPECT_TRUE(ultCsr->recordedDispatchFlags.hasRelaxedOrderingDependencies); EXPECT_TRUE(ultCsr->latestFlushedBatchBuffer.hasRelaxedOrderingDependencies); + + commandList->destroy(); } TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) { diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index ea76c52c13..715bd4db85 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -665,6 +665,23 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { using EventImp::latestUsedInOrderCmdList; }; + template + struct DestroyObject { + void operator()(T *t) { + if (t) { + t->destroy(); + } + } + }; + + template + using DestructableUniquePtr = std::unique_ptr>; + + template + DestructableUniquePtr createDestructableUniqePtr(T *object) { + return DestructableUniquePtr{object}; + } + void SetUp() override { NEO::DebugManager.flags.ForceInOrderImmediateCmdListExecution.set(1); @@ -690,8 +707,8 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { } template - std::unique_ptr>> createImmCmdList() { - auto cmdList = std::make_unique>>(); + DestructableUniquePtr>> createImmCmdList() { + auto cmdList = createDestructableUniqePtr(new WhiteBox>()); auto csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; @@ -701,11 +718,11 @@ struct InOrderCmdListTests : public CommandListAppendLaunchKernel { cmdList->cmdQImmediate = mockCmdQ.get(); cmdList->isFlushTaskSubmissionEnabled = true; - cmdList->setInOrderExecution(true); cmdList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; cmdList->csr = csr; cmdList->initialize(device, NEO::EngineGroupType::RenderCompute, 0u); cmdList->commandContainer.setImmediateCmdListCsr(csr); + cmdList->enableInOrderExecution(); return cmdList; } @@ -824,6 +841,26 @@ HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenSubmittingThenProgramSemaphor ASSERT_NE(cmdList.end(), itor); } +HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenDispatchingThenHandleDependencyCounter, IsAtLeastSkl) { + auto immCmdList = createImmCmdList(); + + EXPECT_NE(nullptr, immCmdList->inOrderDependencyCounterAllocation); + EXPECT_EQ(AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, immCmdList->inOrderDependencyCounterAllocation->getAllocationType()); + + EXPECT_EQ(0u, immCmdList->inOrderDependencyCounter); + + auto itorAlloc = std::find(immCmdList->getCmdContainer().getResidencyContainer().begin(), + immCmdList->getCmdContainer().getResidencyContainer().end(), + immCmdList->inOrderDependencyCounterAllocation); + EXPECT_NE(itorAlloc, immCmdList->getCmdContainer().getResidencyContainer().end()); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(1u, immCmdList->inOrderDependencyCounter); + + immCmdList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + EXPECT_EQ(2u, immCmdList->inOrderDependencyCounter); +} + HWTEST2_F(InOrderCmdListTests, givenInOrderModeWhenAddingRelaxedOrderingEventsThenConfigureRegistersFirst, IsAtLeastSkl) { auto immCmdList = createImmCmdList();