From b09357470ae274180883f9ddb5e054987569d882 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Tue, 8 Aug 2023 14:49:34 +0000 Subject: [PATCH] feature: extend in order execution support on regular command lists Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 13 +++-- .../core/source/cmdlist/cmdlist_imp.cpp | 12 ++-- .../core/test/unit_tests/mocks/mock_cmdlist.h | 1 + .../test_cmdlist_append_launch_kernel_3.cpp | 56 +++++++++++++++++++ 4 files changed, 72 insertions(+), 10 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 354c195c8c..f0defc5309 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -1484,7 +1484,7 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d addToMappedEventList(signalEvent); addFlushRequiredCommand(dstAllocationStruct.needsFlush, signalEvent); - if (this->inOrderExecutionEnabled && isCopyOnly() && inOrderCopyOnlySignalingAllowed) { + if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && isCopyOnly() && inOrderCopyOnlySignalingAllowed) { appendSignalInOrderDependencyCounter(); } @@ -1901,7 +1901,7 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(signalEvent, false, singlePipeControlPacket); addFlushRequiredCommand(hostPointerNeedsFlush, signalEvent); - if (this->inOrderExecutionEnabled && launchParams.isKernelSplitOperation) { + if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode() && launchParams.isKernelSplitOperation) { if (!signalEvent) { NEO::PipeControlArgs args; NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); @@ -1967,7 +1967,7 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, appendSignalEventPostWalker(signalEvent); - if (isInOrderExecutionEnabled()) { + if (isInOrderExecutionEnabled() && useCounterAllocationForInOrderMode()) { appendSignalInOrderDependencyCounter(); } } @@ -2181,7 +2181,7 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han bool appendPipeControlWithPostSync = (!isCopyOnly()) && (event->isSignalScope() || event->isEventTimestampFlagSet()); dispatchEventPostSyncOperation(event, Event::STATE_SIGNALED, false, false, appendPipeControlWithPostSync); - if (this->inOrderExecutionEnabled) { + if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { appendSignalInOrderDependencyCounter(); } @@ -2201,6 +2201,7 @@ void CommandListCoreFamily::appendWaitOnInOrderDependency(NEO::Gr using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; UNRECOVERABLE_IF(waitValue > std::numeric_limits::max()); + UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode()); commandContainer.addToResidencyContainer(dependencyCounterAllocation); @@ -2313,6 +2314,8 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu template void CommandListCoreFamily::appendSignalInOrderDependencyCounter() { + UNRECOVERABLE_IF(!useCounterAllocationForInOrderMode()); + uint32_t signalValue = this->inOrderDependencyCounter + 1; uint64_t gpuVa = this->inOrderDependencyCounterAllocation->getGpuAddress() + this->inOrderAllocationOffset; @@ -2942,7 +2945,7 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ appendEventForProfiling(signalEvent, true); - if (this->inOrderExecutionEnabled) { + if (this->inOrderExecutionEnabled && useCounterAllocationForInOrderMode()) { appendSignalInOrderDependencyCounter(); } else if (isCopyOnly()) { NEO::MiFlushArgs args{this->dummyBlitWa}; diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 5f0c352d17..1d416e1f0f 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -229,15 +229,17 @@ void CommandListImp::setStreamPropertiesDefaultSettings(NEO::StreamProperties &s void CommandListImp::enableInOrderExecution() { UNRECOVERABLE_IF(inOrderExecutionEnabled); - auto device = this->device->getNEODevice(); + if (this->cmdListType == TYPE_IMMEDIATE) { + auto device = this->device->getNEODevice(); - NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; + NEO::AllocationProperties allocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize64k, NEO::AllocationType::TIMESTAMP_PACKET_TAG_BUFFER, device->getDeviceBitfield()}; - inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); + inOrderDependencyCounterAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(allocationProperties); - UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation); + UNRECOVERABLE_IF(!inOrderDependencyCounterAllocation); - memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + memset(inOrderDependencyCounterAllocation->getUnderlyingBuffer(), 0, inOrderDependencyCounterAllocation->getUnderlyingBufferSize()); + } inOrderExecutionEnabled = true; } diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 2ac3204fb2..dbf707f5ca 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -77,6 +77,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::initialize; using BaseClass::inOrderAllocationOffset; using BaseClass::inOrderDependencyCounter; + using BaseClass::inOrderDependencyCounterAllocation; using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::isRelaxedOrderingDispatchAllowed; using BaseClass::isSyncModeQueue; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 8a3d2bfd59..eb91cb0582 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -2758,6 +2758,62 @@ HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdL EXPECT_EQ(0u, regularCmdList->inOrderAllocationOffset); } +HWTEST2_F(InOrderRegularCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontUpdateCounterAllocation, IsAtLeastXeHpCore) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + + auto eventPool = createEvents(1, true); + auto eventHandle = events[0]->toHandle(); + + auto regularCmdList = createRegularCmdList(false); + auto regularCopyOnlyCmdList = createRegularCmdList(true); + + auto cmdStream = regularCmdList->getCmdContainer().getCommandStream(); + auto copyOnlyCmdStream = regularCopyOnlyCmdList->getCmdContainer().getCommandStream(); + + size_t offset = cmdStream->getUsed(); + + EXPECT_EQ(0u, regularCmdList->inOrderDependencyCounter); + EXPECT_EQ(nullptr, regularCmdList->inOrderDependencyCounterAllocation); + + constexpr size_t size = 128 * sizeof(uint32_t); + auto data = allocHostMem(size); + + ze_copy_region_t region = {0, 0, 0, 1, 1, 1}; + + regularCmdList->appendMemoryCopyRegion(data, ®ion, 1, 1, data, ®ion, 1, 1, nullptr, 0, nullptr, false, false); + + regularCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false); + + regularCmdList->appendSignalEvent(eventHandle); + + regularCmdList->appendBarrier(nullptr, 1, &eventHandle); + + { + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + ptrOffset(cmdStream->getCpuBase(), offset), + (cmdStream->getUsed() - offset))); + + auto sdiItor = find(cmdList.begin(), cmdList.end()); + EXPECT_EQ(cmdList.end(), sdiItor); + } + + offset = copyOnlyCmdStream->getUsed(); + regularCopyOnlyCmdList->appendMemoryFill(data, data, 1, size, nullptr, 0, nullptr, false); + + { + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, + ptrOffset(copyOnlyCmdStream->getCpuBase(), offset), + (copyOnlyCmdStream->getUsed() - offset))); + + auto sdiItor = find(cmdList.begin(), cmdList.end()); + EXPECT_EQ(cmdList.end(), sdiItor); + } + + context->freeMem(data); +} + using InOrderRegularCopyOnlyCmdListTests = InOrderCmdListTests; HWTEST2_F(InOrderRegularCopyOnlyCmdListTests, givenInOrderModeWhenDispatchingRegularCmdListThenDontProgramBarriers, IsAtLeastXeHpCore) {