diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.h b/level_zero/core/source/cmdlist/cmdlist_hw.h index 3eeb8526db..52dae8f537 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw.h @@ -223,6 +223,7 @@ struct CommandListCoreFamily : CommandListImp { ze_result_t appendLaunchKernelSplit(ze_kernel_handle_t hKernel, const ze_group_count_t *pThreadGroupDimensions, ze_event_handle_t hEvent); ze_result_t prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions); void updateStreamProperties(Kernel &kernel, bool isMultiOsContextCapable, bool isCooperative); + void clearComputeModePropertiesIfNeeded(bool requiresCoherency, uint32_t numGrfRequired, uint32_t threadArbitrationPolicy); void clearCommandsToPatch(); void applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index d7895d07a4..8f738eac68 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2185,11 +2185,15 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy); if (finalStreamState.stateComputeMode.isDirty()) { + clearComputeModePropertiesIfNeeded(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy); NEO::EncodeWA::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true); NEO::EncodeComputeMode::adjustComputeMode(*commandContainer.getCommandStream(), nullptr, finalStreamState.stateComputeMode, hwInfo); NEO::EncodeWA::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), false); } } +template +void CommandListCoreFamily::clearComputeModePropertiesIfNeeded(bool requiresCoherency, uint32_t numGrfRequired, uint32_t threadArbitrationPolicy) { +} template void CommandListCoreFamily::clearCommandsToPatch() { diff --git a/level_zero/core/source/xe_hp_core/xehp/cmdlist_xehp.cpp b/level_zero/core/source/xe_hp_core/xehp/cmdlist_xehp.cpp index 09c52ebb7c..0717c2eedf 100644 --- a/level_zero/core/source/xe_hp_core/xehp/cmdlist_xehp.cpp +++ b/level_zero/core/source/xe_hp_core/xehp/cmdlist_xehp.cpp @@ -13,6 +13,12 @@ namespace L0 { +template <> +void CommandListCoreFamily::clearComputeModePropertiesIfNeeded(bool requiresCoherency, uint32_t numGrfRequired, uint32_t threadArbitrationPolicy) { + finalStreamState.stateComputeMode = {}; + finalStreamState.stateComputeMode.setProperties(requiresCoherency, numGrfRequired, threadArbitrationPolicy); +} + template struct CommandListCoreFamily; template <> diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp index c78f64831c..129b26e02c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_4.cpp @@ -11,8 +11,10 @@ #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h" +#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" +#include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { namespace ult { @@ -161,6 +163,56 @@ HWTEST2_F(CommandListCreate, whenCommandListIsCreatedThenFlagsAreCorrectlySet, P EXPECT_EQ(flag, pCommandListCoreFamily->flags); } } +using CommandListAppendLaunchKernel = Test; +HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStreamPropertiesIsCalledTwiceThenChangedFieldsAreDirty, IsAtLeastGen12lp) { + using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; + DebugManagerStateRestore restorer; + + Mock<::L0::Kernel> kernel; + auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); + kernel.module = pMockModule.get(); + + auto pCommandList = std::make_unique>>(); + auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; + pCommandList->updateStreamProperties(kernel, false, false); + EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); + EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); + + const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80; + pCommandList->updateStreamProperties(kernel, false, false); + EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); + if (IsXEHP::isMatched()) { + EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); + } else { + EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); + } +} + +HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertesNotChangedThenAllFieldsAreNotDirty, IsAtLeastGen12lp) { + using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; + DebugManagerStateRestore restorer; + + Mock<::L0::Kernel> kernel; + auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); + kernel.module = pMockModule.get(); + + auto pCommandList = std::make_unique>>(); + auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + const_cast(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100; + pCommandList->updateStreamProperties(kernel, false, false); + EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); + EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); + + pCommandList->updateStreamProperties(kernel, false, false); + EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty); + + EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty); +} using HostPointerManagerCommandListTest = Test; HWTEST2_F(HostPointerManagerCommandListTest,