From a61b39c47b8029a3b647375fa385335cfad393f3 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Wed, 21 Jun 2023 11:40:18 +0000 Subject: [PATCH] fix: Invalidate constant cache when ULLS enabled Related-To: NEO-8067 Signed-off-by: Lukasz Jobczyk --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 9 +++ .../core/test/unit_tests/mocks/mock_cmdlist.h | 2 + .../test_cmdlist_append_signal_event.cpp | 63 +++++++++++++++++++ shared/source/os_interface/product_helper.h | 1 + shared/source/os_interface/product_helper.inl | 5 ++ .../source/os_interface/product_helper_hw.h | 1 + .../mtl/os_agnostic_product_helper_mtl.inl | 5 ++ .../test/common/mocks/mock_product_helper.cpp | 5 ++ .../pvc/test_product_helper_pvc.cpp | 5 ++ .../mtl/product_helper_tests_mtl.cpp | 5 ++ 10 files changed, 101 insertions(+) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index db9ca2cea7..1cdb2d65f3 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -3141,6 +3141,15 @@ void CommandListCoreFamily::dispatchPostSyncCommands(const CmdLis pipeControlArgs.dcFlushEnable = getDcFlushRequired(signalScope); pipeControlArgs.workloadPartitionOffset = eventOperations.workPartitionOperation; + const auto &productHelper = this->device->getNEODevice()->getRootDeviceEnvironment().template getHelper(); + if (productHelper.isDirectSubmissionConstantCacheInvalidationNeeded(this->device->getHwInfo())) { + if (this->cmdListType == CommandListType::TYPE_IMMEDIATE) { + pipeControlArgs.constantCacheInvalidationEnable = this->csr->isDirectSubmissionEnabled(); + } else { + pipeControlArgs.constantCacheInvalidationEnable = productHelper.isDirectSubmissionSupported(this->device->getHwInfo()); + } + } + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandContainer.getCommandStream(), NEO::PostSyncMode::ImmediateData, diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index 9b469d1a08..c9fd9f20bc 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -198,6 +198,7 @@ struct MockCommandListImmediate : public CommandListCoreFamilyImmediate>; MockCommandListImmediateHw() : BaseClass() {} + using BaseClass::appendSignalEventPostWalker; using BaseClass::applyMemoryRangesBarrier; using BaseClass::cmdListType; using BaseClass::copyThroughLockedPtrEnabled; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index 95997d59b0..bc106de9d3 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -343,6 +343,69 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, device->getNEODevice()->getRootDeviceEnvironment()), cmd->getDcFlushEnable()); + auto &productHelper = device->getNEODevice()->getRootDeviceEnvironment().getHelper(); + EXPECT_EQ(productHelper.isDirectSubmissionConstantCacheInvalidationNeeded(device->getHwInfo()) && productHelper.isDirectSubmissionSupported(device->getHwInfo()), cmd->getConstantCacheInvalidationEnable()); + EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); + postSyncFound++; + gpuAddress += event->getSinglePacketSize(); + } + } + EXPECT_EQ(1u, postSyncFound); +} + +HWTEST2_F(CommandListAppendUsedPacketSignalEvent, + givenMultiTileImmediateCommandListWhenAppendingScopeEventSignalAfterWalkerThenExpectPartitionedPipeControl, IsAtLeastXeHpCore) { + using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END; + + auto commandList = std::make_unique<::L0::ult::MockCommandListImmediateHw>(); + ASSERT_NE(nullptr, commandList); + ze_result_t returnValue = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue); + commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; + commandList->cmdListType = CommandList::CommandListType::TYPE_IMMEDIATE; + + auto cmdStream = commandList->getCmdContainer().getCommandStream(); + + size_t useSize = cmdStream->getAvailableSpace(); + useSize -= sizeof(MI_BATCH_BUFFER_END); + cmdStream->getSpace(useSize); + + constexpr uint32_t packets = 2u; + + event->setEventTimestampFlag(false); + event->signalScope = ZE_EVENT_SCOPE_FLAG_HOST; + + commandList->partitionCount = packets; + commandList->appendSignalEventPostWalker(event.get()); + EXPECT_EQ(packets, event->getPacketsInUse()); + + auto gpuAddress = event->getCompletionFieldGpuAddress(device); + + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), false); + size_t usedSize = cmdStream->getUsed(); + EXPECT_EQ(expectedSize, usedSize); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + cmdStream->getCpuBase(), + usedSize)); + + auto pipeControlList = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, pipeControlList.size()); + uint32_t postSyncFound = 0; + for (auto &it : pipeControlList) { + auto cmd = genCmdCast(*it); + if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { + EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData()); + EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); + EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, device->getNEODevice()->getRootDeviceEnvironment()), cmd->getDcFlushEnable()); + auto &productHelper = device->getNEODevice()->getRootDeviceEnvironment().getHelper(); + EXPECT_EQ(productHelper.isDirectSubmissionConstantCacheInvalidationNeeded(device->getHwInfo()) && commandList->csr->isDirectSubmissionEnabled(), cmd->getConstantCacheInvalidationEnable()); EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); diff --git a/shared/source/os_interface/product_helper.h b/shared/source/os_interface/product_helper.h index 6a969f2542..c6c403f0ec 100644 --- a/shared/source/os_interface/product_helper.h +++ b/shared/source/os_interface/product_helper.h @@ -103,6 +103,7 @@ class ProductHelper { virtual bool isAllocationSizeAdjustmentRequired(const HardwareInfo &hwInfo) const = 0; virtual bool isNewResidencyModelSupported() const = 0; virtual bool isDirectSubmissionSupported(const HardwareInfo &hwInfo) const = 0; + virtual bool isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const = 0; virtual std::pair isPipeControlPriorToNonPipelinedStateCommandsWARequired(const HardwareInfo &hwInfo, bool isRcs, const ReleaseHelper *releaseHelper) const = 0; virtual bool heapInLocalMem(const HardwareInfo &hwInfo) const = 0; virtual void setCapabilityCoherencyFlag(const HardwareInfo &hwInfo, bool &coherencyFlag) = 0; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index 96bd84312f..f6c7417df3 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -322,6 +322,11 @@ bool ProductHelperHw::isDirectSubmissionSupported(const HardwareInfo return false; } +template +bool ProductHelperHw::isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const { + return false; +} + template bool ProductHelperHw::isForceEmuInt32DivRemSPWARequired(const HardwareInfo &hwInfo) const { return false; diff --git a/shared/source/os_interface/product_helper_hw.h b/shared/source/os_interface/product_helper_hw.h index 9c707bb5ca..47c6298b85 100644 --- a/shared/source/os_interface/product_helper_hw.h +++ b/shared/source/os_interface/product_helper_hw.h @@ -56,6 +56,7 @@ class ProductHelperHw : public ProductHelper { bool isAllocationSizeAdjustmentRequired(const HardwareInfo &hwInfo) const override; bool isNewResidencyModelSupported() const override; bool isDirectSubmissionSupported(const HardwareInfo &hwInfo) const override; + bool isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const override; std::pair isPipeControlPriorToNonPipelinedStateCommandsWARequired(const HardwareInfo &hwInfo, bool isRcs, const ReleaseHelper *releaseHelper) const override; bool heapInLocalMem(const HardwareInfo &hwInfo) const override; void setCapabilityCoherencyFlag(const HardwareInfo &hwInfo, bool &coherencyFlag) override; diff --git a/shared/source/xe_hpg_core/mtl/os_agnostic_product_helper_mtl.inl b/shared/source/xe_hpg_core/mtl/os_agnostic_product_helper_mtl.inl index 43b82969f1..5f6724b7fc 100644 --- a/shared/source/xe_hpg_core/mtl/os_agnostic_product_helper_mtl.inl +++ b/shared/source/xe_hpg_core/mtl/os_agnostic_product_helper_mtl.inl @@ -41,6 +41,11 @@ uint32_t ProductHelperHw::getSteppingFromHwRevId(const HardwareInfo return CommonConstants::invalidStepping; } +template <> +bool ProductHelperHw::isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const { + return true; +} + template <> std::pair ProductHelperHw::isPipeControlPriorToNonPipelinedStateCommandsWARequired(const HardwareInfo &hwInfo, bool isRcs, const ReleaseHelper *releaseHelper) const { auto isBasicWARequired = true; diff --git a/shared/test/common/mocks/mock_product_helper.cpp b/shared/test/common/mocks/mock_product_helper.cpp index 4b69c96f00..05ce0a6840 100644 --- a/shared/test/common/mocks/mock_product_helper.cpp +++ b/shared/test/common/mocks/mock_product_helper.cpp @@ -165,6 +165,11 @@ bool ProductHelperHw::isDirectSubmissionSupported(const HardwareIn return false; } +template <> +bool ProductHelperHw::isDirectSubmissionConstantCacheInvalidationNeeded(const HardwareInfo &hwInfo) const { + return false; +} + template <> LocalMemoryAccessMode ProductHelperHw::getDefaultLocalMemoryAccessMode(const HardwareInfo &hwInfo) const { return LocalMemoryAccessMode::Default; diff --git a/shared/test/unit_test/xe_hpc_core/pvc/test_product_helper_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/test_product_helper_pvc.cpp index 2334f43ece..850dc21496 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/test_product_helper_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/test_product_helper_pvc.cpp @@ -173,6 +173,11 @@ PVCTEST_F(PvcProductHelper, givenPvcProductHelperWhenCheckDirectSubmissionSuppor EXPECT_TRUE(productHelper->isDirectSubmissionSupported(hwInfo)); } +PVCTEST_F(PvcProductHelper, givenPvcProductHelperWhenCheckDirectSubmissionConstantCacheInvalidationNeededThenFalseIsReturned) { + auto hwInfo = *defaultHwInfo; + EXPECT_FALSE(productHelper->isDirectSubmissionConstantCacheInvalidationNeeded(hwInfo)); +} + PVCTEST_F(PvcProductHelper, givenPvcProductHelperWhenCheckCopyEngineSelectorEnabledThenFalseIsReturned) { auto hwInfo = *defaultHwInfo; EXPECT_FALSE(productHelper->isCopyEngineSelectorEnabled(hwInfo)); diff --git a/shared/test/unit_test/xe_hpg_core/mtl/product_helper_tests_mtl.cpp b/shared/test/unit_test/xe_hpg_core/mtl/product_helper_tests_mtl.cpp index e0679c109a..463eac6af5 100644 --- a/shared/test/unit_test/xe_hpg_core/mtl/product_helper_tests_mtl.cpp +++ b/shared/test/unit_test/xe_hpg_core/mtl/product_helper_tests_mtl.cpp @@ -67,6 +67,11 @@ MTLTEST_F(MtlProductHelper, whenGettingAubstreamProductFamilyThenProperEnumValue EXPECT_EQ(aub_stream::ProductFamily::Mtl, productHelper->getAubStreamProductFamily()); } +MTLTEST_F(MtlProductHelper, givenMtlProductHelperWhenCheckDirectSubmissionConstantCacheInvalidationNeededThenTrueIsReturned) { + auto hwInfo = *defaultHwInfo; + EXPECT_TRUE(productHelper->isDirectSubmissionConstantCacheInvalidationNeeded(hwInfo)); +} + MTLTEST_F(MtlProductHelper, givenMtlProductHelperWhenIsInitBuiltinAsyncSupportedThenReturnFalse) { EXPECT_FALSE(productHelper->isInitBuiltinAsyncSupported(*defaultHwInfo)); }