From 6437c1a91efdc0a5e8d47c7d60f22f1b247ae071 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Mon, 27 Mar 2023 21:37:18 +0000 Subject: [PATCH] Flush state caches after command list is destroyed When state base address tracking is enabled and command list use private heaps then command list at destroy time must calls all compute CSRs that were using that heap to invalidate state caches. This allows new command list to reuse the same heap allocation for different surface states, so before new use cached states are invalidated. Related-To: NEO-5055 Signed-off-by: Zbigniew Zdanowicz --- .../core/source/cmdlist/cmdlist_imp.cpp | 24 ++++++++++++++ .../unit_tests/fixtures/cmdlist_fixture.cpp | 2 ++ .../unit_tests/fixtures/cmdlist_fixture.h | 1 + .../sources/cmdlist/test_cmdlist_5.cpp | 26 ++++++++++++++++ .../sources/cmdlist/test_cmdlist_6.cpp | 26 ++++++++++++++++ opencl/test/unit_test/kernel/kernel_tests.cpp | 4 +++ .../command_stream/command_stream_receiver.h | 1 + .../command_stream_receiver_hw.h | 3 +- .../command_stream_receiver_hw_base.inl | 15 +++++++-- shared/source/helpers/engine_node_helper.cpp | 4 +++ shared/source/helpers/engine_node_helper.h | 1 + shared/test/common/helpers/unit_test_helper.h | 2 ++ .../test/common/helpers/unit_test_helper.inl | 26 ++++++++++++++++ .../mocks/mock_command_stream_receiver.h | 4 +++ .../command_stream_receiver_tests.cpp | 20 ++++++++++-- .../helpers/engine_node_helper_tests.cpp | 31 +++++++++++++++++++ 16 files changed, 185 insertions(+), 5 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 39e8ca4dba..bcb3fa674d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -11,10 +11,13 @@ #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/device/device.h" +#include "shared/source/helpers/engine_control.h" #include "shared/source/helpers/engine_node_helper.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/logical_state_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" +#include "shared/source/memory_manager/memory_manager.h" +#include "shared/source/os_interface/os_context.h" #include "shared/source/os_interface/sys_calls_common.h" #include "level_zero/core/source/cmdqueue/cmdqueue.h" @@ -44,6 +47,27 @@ ze_result_t CommandListImp::destroy() { auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; this->csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, this->csr->peekTaskCount()); } + + if (this->cmdListType == CommandListType::TYPE_REGULAR && + !isCopyOnly() && + this->stateBaseAddressTracking && + this->cmdListHeapAddressModel == NEO::HeapAddressModel::PrivateHeaps) { + + auto memoryManager = device->getNEODevice()->getMemoryManager(); + + auto heapAllocation = this->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation(); + for (auto &engine : memoryManager->getRegisteredEngines()) { + if (NEO::EngineHelpers::isComputeEngine(engine.getEngineType())) { + auto contextId = engine.osContext->getContextId(); + + if (heapAllocation->isUsedByOsContext(contextId)) { + engine.commandStreamReceiver->sendRenderStateCacheFlush(); + engine.commandStreamReceiver->waitForCompletionWithTimeout(NEO::WaitParams{false, false, NEO::TimeoutControls::maxTimeout}, engine.commandStreamReceiver->peekTaskCount()); + } + } + } + } + delete this; return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index 1c02e5454e..b67e566aca 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -113,6 +113,8 @@ void ModuleMutableCommandListFixture::setUpImpl() { } void ModuleMutableCommandListFixture::setUp(uint32_t revision) { + backupHwInfo = std::make_unique>(defaultHwInfo.get()); + defaultHwInfo->capabilityTable.blitterOperationsSupported = true; if (revision != 0) { DebugManager.flags.OverrideRevision.set(revision); } diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 2a186f1e4a..b57ee554ba 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -79,6 +79,7 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture { std::unique_ptr commandList; std::unique_ptr commandListImmediate; std::unique_ptr kernel; + std::unique_ptr> backupHwInfo; L0::ult::CommandQueue *commandQueue; NEO::EngineGroupType engineGroupType; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index 4a8e57c38d..f891c36b4e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -2558,5 +2558,31 @@ HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, ASSERT_EQ(0u, sbaCmds.size()); } +HWTEST2_F(CommandListStateBaseAddressPrivateHeapTest, + givenCommandListUsingPrivateSurfaceHeapWhenCommandListDestroyedThenCsrDispatchesStateCacheFlush, + IsAtLeastSkl) { + auto &csr = neoDevice->getUltCommandStreamReceiver(); + auto &csrStream = csr.commandStream; + + ze_result_t returnValue; + L0::ult::CommandList *cmdListObject = whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue)); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + cmdListObject->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + returnValue = cmdListObject->close(); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + auto cmdListHandle = cmdListObject->toHandle(); + returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + returnValue = cmdListObject->destroy(); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + EXPECT_TRUE(NEO::UnitTestHelper::findStateCacheFlushPipeControl(csrStream)); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 05c4866e4e..57e6417565 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -2275,5 +2275,31 @@ HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest, EXPECT_EQ(scratchAllocation->getGpuAddress(), scratchSurfaceState->getSurfaceBaseAddress()); } +HWTEST2_F(CommandListStateBaseAddressGlobalStatelessTest, + givenCommandListNotUsingPrivateSurfaceHeapWhenCommandListDestroyedThenCsrDoesNotDispatchStateCacheFlush, + IsAtLeastSkl) { + auto &csr = neoDevice->getUltCommandStreamReceiver(); + auto &csrStream = csr.commandStream; + + ze_result_t returnValue; + L0::ult::CommandList *cmdListObject = whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue)); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + cmdListObject->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false); + + returnValue = cmdListObject->close(); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + auto cmdListHandle = cmdListObject->toHandle(); + returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + returnValue = cmdListObject->destroy(); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + + EXPECT_EQ(0u, csrStream.getUsed()); +} + } // namespace ult } // namespace L0 diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 8637eaa551..52f97b23ec 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -567,6 +567,10 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { return cs; } + SubmissionStatus sendRenderStateCacheFlush() override { + return SubmissionStatus::SUCCESS; + } + bool flushBatchedSubmissions() override { return true; } CommandStreamReceiverType getType() const override { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index dd033890ed..3354ffe919 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -93,6 +93,7 @@ class CommandStreamReceiver { const IndirectHeap *dsh, const IndirectHeap *ioh, const IndirectHeap *ssh, TaskCountType taskLevel, DispatchFlags &dispatchFlags, Device &device) = 0; virtual CompletionStamp flushBcsTask(LinearStream &commandStream, size_t commandStreamStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) = 0; + virtual SubmissionStatus sendRenderStateCacheFlush() = 0; virtual bool flushBatchedSubmissions() = 0; MOCKABLE_VIRTUAL SubmissionStatus submitBatchBuffer(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency); diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 3c84c7d831..0f8546e04f 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -100,10 +100,11 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { SubmissionStatus flushTagUpdate() override; SubmissionStatus flushMiFlushDW(); - SubmissionStatus flushPipeControl(); + SubmissionStatus flushPipeControl(bool stateCacheFlush); SubmissionStatus flushSmallTask(LinearStream &commandStreamTask, size_t commandStreamStartTask); SubmissionStatus flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency); + SubmissionStatus sendRenderStateCacheFlush() override; bool isUpdateTagFromWaitEnabled() override; void updateTagFromWait() override; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 8ebe885643..864c9ad977 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -1365,7 +1365,7 @@ inline SubmissionStatus CommandStreamReceiverHw::flushTagUpdate() { if (EngineHelpers::isBcs(this->osContext->getEngineType())) { return this->flushMiFlushDW(); } else { - return this->flushPipeControl(); + return this->flushPipeControl(false); } } return SubmissionStatus::DEVICE_UNINITIALIZED; @@ -1393,7 +1393,7 @@ inline SubmissionStatus CommandStreamReceiverHw::flushMiFlushDW() { } template -SubmissionStatus CommandStreamReceiverHw::flushPipeControl() { +SubmissionStatus CommandStreamReceiverHw::flushPipeControl(bool stateCacheFlush) { auto lock = obtainUniqueOwnership(); PipeControlArgs args; @@ -1401,6 +1401,12 @@ SubmissionStatus CommandStreamReceiverHw::flushPipeControl() { args.notifyEnable = isUsedNotifyEnableForPostSync(); args.workloadPartitionOffset = isMultiTileOperationEnabled(); + if (stateCacheFlush) { + args.textureCacheInvalidationEnable = true; + args.renderTargetCacheFlushEnable = true; + args.stateCacheInvalidationEnable = true; + } + auto dispatchSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), args.tlbInvalidation) + this->getCmdSizeForPrologue(); auto &commandStream = getCS(dispatchSize); @@ -1454,6 +1460,11 @@ SubmissionStatus CommandStreamReceiverHw::flushSmallTask(LinearStream return submissionStatus; } +template +SubmissionStatus CommandStreamReceiverHw::sendRenderStateCacheFlush() { + return this->flushPipeControl(true); +} + template inline SubmissionStatus CommandStreamReceiverHw::flushHandler(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) { auto status = flush(batchBuffer, allocationsForResidency); diff --git a/shared/source/helpers/engine_node_helper.cpp b/shared/source/helpers/engine_node_helper.cpp index b68c597132..2bf243baf4 100644 --- a/shared/source/helpers/engine_node_helper.cpp +++ b/shared/source/helpers/engine_node_helper.cpp @@ -75,6 +75,10 @@ bool isCcs(aub_stream::EngineType engineType) { return engineType >= aub_stream::ENGINE_CCS && engineType <= aub_stream::ENGINE_CCS3; } +bool isComputeEngine(aub_stream::EngineType engineType) { + return isCcs(engineType) || engineType == aub_stream::ENGINE_RCS || engineType == aub_stream::ENGINE_CCCS; +} + bool isBcs(aub_stream::EngineType engineType) { return engineType == aub_stream::ENGINE_BCS || (engineType >= aub_stream::ENGINE_BCS1 && engineType <= aub_stream::ENGINE_BCS8); } diff --git a/shared/source/helpers/engine_node_helper.h b/shared/source/helpers/engine_node_helper.h index 0404db5b39..f8c7587f8b 100644 --- a/shared/source/helpers/engine_node_helper.h +++ b/shared/source/helpers/engine_node_helper.h @@ -45,6 +45,7 @@ struct EngineDescriptor { namespace EngineHelpers { bool isCcs(aub_stream::EngineType engineType); +bool isComputeEngine(aub_stream::EngineType engineType); bool isBcs(aub_stream::EngineType engineType); bool isBcsVirtualEngineEnabled(aub_stream::EngineType engineType); aub_stream::EngineType getBcsEngineType(const RootDeviceEnvironment &rootDeviceEnvironment, const DeviceBitfield &deviceBitfield, SelectorCopyEngine &selectorCopyEngine, bool internalUsage); diff --git a/shared/test/common/helpers/unit_test_helper.h b/shared/test/common/helpers/unit_test_helper.h index 91e4b93557..9ae283d40f 100644 --- a/shared/test/common/helpers/unit_test_helper.h +++ b/shared/test/common/helpers/unit_test_helper.h @@ -93,6 +93,8 @@ struct UnitTestHelper { static bool getSystolicFlagValueFromPipelineSelectCommand(const typename GfxFamily::PIPELINE_SELECT &pipelineSelectCmd); static size_t getAdditionalDshSize(uint32_t iddCount); static bool expectNullDsh(const DeviceInfo &deviceInfo); + + static bool findStateCacheFlushPipeControl(LinearStream &csrStream); }; } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper.inl b/shared/test/common/helpers/unit_test_helper.inl index 9929d642e8..4050aec7c0 100644 --- a/shared/test/common/helpers/unit_test_helper.inl +++ b/shared/test/common/helpers/unit_test_helper.inl @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_stream/linear_stream.h" #include "shared/source/device/device_info.h" #include "shared/source/helpers/hw_info.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" @@ -113,4 +114,29 @@ bool UnitTestHelper::expectNullDsh(const DeviceInfo &deviceInfo) { return true; } +template +bool UnitTestHelper::findStateCacheFlushPipeControl(LinearStream &csrStream) { + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + + HardwareParse hwParserCsr; + hwParserCsr.parsePipeControl = true; + hwParserCsr.parseCommands(csrStream, 0); + hwParserCsr.findHardwareCommands(); + + bool stateCacheFlushFound = false; + auto itorPipeControl = hwParserCsr.pipeControlList.begin(); + while (itorPipeControl != hwParserCsr.pipeControlList.end()) { + auto pipeControl = reinterpret_cast(*itorPipeControl); + + if (pipeControl->getRenderTargetCacheFlushEnable() && + pipeControl->getStateCacheInvalidationEnable() && + pipeControl->getTextureCacheInvalidationEnable()) { + stateCacheFlushFound = true; + break; + } + itorPipeControl++; + } + return stateCacheFlushFound; +} + } // namespace NEO diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index d52a9c13d7..bc7c528025 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -114,6 +114,10 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart, const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override; + SubmissionStatus sendRenderStateCacheFlush() override { + return SubmissionStatus::SUCCESS; + } + bool flushBatchedSubmissions() override { if (flushBatchedSubmissionsCallCounter) { (*flushBatchedSubmissionsCallCounter)++; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 1b5d566943..811d9e2ee2 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -2777,10 +2777,10 @@ HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenFlushing commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_MEMORY; - EXPECT_EQ(SubmissionStatus::OUT_OF_MEMORY, commandStreamReceiver.flushPipeControl()); + EXPECT_EQ(SubmissionStatus::OUT_OF_MEMORY, commandStreamReceiver.flushPipeControl(false)); commandStreamReceiver.flushReturnValue = SubmissionStatus::OUT_OF_HOST_MEMORY; - EXPECT_EQ(SubmissionStatus::OUT_OF_HOST_MEMORY, commandStreamReceiver.flushPipeControl()); + EXPECT_EQ(SubmissionStatus::OUT_OF_HOST_MEMORY, commandStreamReceiver.flushPipeControl(false)); } HWTEST_F(CommandStreamReceiverHwTest, givenOutOfMemoryFailureOnFlushWhenFlushingTagUpdateThenErrorIsPropagated) { @@ -3097,3 +3097,19 @@ HWTEST2_F(CommandStreamReceiverHwTest, auto scmCmd = hwParserCsr.getCommand(); EXPECT_NE(nullptr, scmCmd); } + +HWTEST_F(CommandStreamReceiverHwTest, givenFlushPipeControlWhenFlushWithoutStateCacheFlushThenExpectNoStateCacheFlushFlagsSet) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + + commandStreamReceiver.flushPipeControl(false); + + EXPECT_FALSE(UnitTestHelper::findStateCacheFlushPipeControl(commandStreamReceiver.commandStream)); +} + +HWTEST_F(CommandStreamReceiverHwTest, givenFlushPipeControlWhenFlushWithStateCacheFlushThenExpectStateCacheFlushFlagsSet) { + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + + commandStreamReceiver.sendRenderStateCacheFlush(); + + EXPECT_TRUE(UnitTestHelper::findStateCacheFlushPipeControl(commandStreamReceiver.commandStream)); +} diff --git a/shared/test/unit_test/helpers/engine_node_helper_tests.cpp b/shared/test/unit_test/helpers/engine_node_helper_tests.cpp index ac27fbbf40..c500ab1729 100644 --- a/shared/test/unit_test/helpers/engine_node_helper_tests.cpp +++ b/shared/test/unit_test/helpers/engine_node_helper_tests.cpp @@ -149,3 +149,34 @@ TEST(EngineNodeHelperTest, givenLinkCopyEnginesAndInternalUsageEnabledWhenGettin EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage)); EXPECT_EQ(aub_stream::EngineType::ENGINE_BCS3, EngineHelpers::getBcsEngineType(rootDeviceEnvironment, deviceBitfield, selectorCopyEngine, isInternalUsage)); } + +TEST(EngineNodeHelperTest, givenAllEnginesWhenCheckingEngineIsComputeCapableThenReturnTrueOnlyForCompute) { + struct EngineProperties { + aub_stream::EngineType engineType; + bool isCompute; + }; + + const EngineProperties engines[] = { + {aub_stream::ENGINE_RCS, true}, + {aub_stream::ENGINE_CCS, true}, + {aub_stream::ENGINE_CCS1, true}, + {aub_stream::ENGINE_CCS2, true}, + {aub_stream::ENGINE_CCS3, true}, + {aub_stream::ENGINE_CCCS, true}, + + {aub_stream::ENGINE_BCS, false}, + {aub_stream::ENGINE_BCS1, false}, + {aub_stream::ENGINE_BCS2, false}, + {aub_stream::ENGINE_BCS3, false}, + {aub_stream::ENGINE_BCS4, false}, + {aub_stream::ENGINE_BCS5, false}, + {aub_stream::ENGINE_BCS6, false}, + {aub_stream::ENGINE_BCS7, false}, + {aub_stream::ENGINE_BCS8, false}}; + + const size_t numEngines = sizeof(engines) / sizeof(EngineProperties); + + for (size_t i = 0; i < numEngines; i++) { + EXPECT_EQ(engines[i].isCompute, EngineHelpers::isComputeEngine(engines[i].engineType)); + } +}