From cd955724435663e0036af8ad52cfde8e1ad8b5bc Mon Sep 17 00:00:00 2001 From: Filip Hazubski Date: Tue, 8 Mar 2022 14:18:31 +0000 Subject: [PATCH] Reuse common logic of programming SCM fields for gen 9 and gen 11 Logic related to programming non coherent and thread arbitration policy for gens 9 and 11 has been moved to EncodeComputeMode object, where similar logic for gens gen12lp and newer is located. Functions PreambleHelper::programThreadArbitration and PreambleHelper::getThreadArbitrationCommandsSize have been removed. Redundant setForceNonCoherent call has been removed from XE HPG Related-To: NEO-6728 Signed-off-by: Filip Hazubski --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 5 +- .../sources/cmdlist/test_cmdlist_3.cpp | 135 +++--------------- .../command_stream_receiver_hw_1_tests.cpp | 3 +- .../unit_test/gen11/coherency_tests_gen11.cpp | 9 +- .../unit_test/gen9/coherency_tests_gen9.cpp | 4 - .../helpers/test_preamble_xehp_and_later.cpp | 8 +- .../xe_hpc_core/excludes_ocl_xe_hpc_core.cpp | 1 - .../command_encoder_bdw_and_later.inl | 4 - .../encode_compute_mode_bdw_and_later.inl | 7 +- .../command_stream_receiver_hw_base.inl | 18 +-- ...and_stream_receiver_hw_tgllp_and_later.inl | 8 -- shared/source/gen11/command_encoder_gen11.cpp | 47 ++++-- .../command_stream_receiver_hw_gen11.cpp | 9 -- shared/source/gen11/preamble_gen11.cpp | 20 --- shared/source/gen12lp/preamble_gen12lp.cpp | 2 - shared/source/gen8/command_encoder_gen8.cpp | 13 +- .../gen8/command_stream_receiver_hw_gen8.cpp | 4 - shared/source/gen9/command_encoder_gen9.cpp | 25 +++- .../gen9/command_stream_receiver_hw_gen9.cpp | 4 - shared/source/gen9/preamble_gen9.cpp | 20 --- shared/source/helpers/preamble.h | 2 - shared/source/helpers/preamble_base.inl | 9 -- .../command_encoder_xe_hpg_core.cpp | 2 - .../test/common/gen11/test_preamble_gen11.cpp | 8 +- shared/test/common/gen11/test_traits_gen11.h | 2 + .../common/gen12lp/test_preamble_gen12lp.cpp | 7 +- .../test/common/gen8/test_preamble_gen8.cpp | 5 +- shared/test/common/gen8/test_traits_gen8.h | 2 + .../test/common/gen9/preamble_tests_gen9.cpp | 8 +- shared/test/common/gen9/test_traits_gen9.h | 2 + .../unit_test/encoders/test_encode_states.cpp | 70 ++++++++- 31 files changed, 190 insertions(+), 273 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 4cb076fc02..9d0c3a42fb 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -78,7 +78,10 @@ void CommandListCoreFamily::programThreadArbitrationPolicy(Device if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) { threadArbitrationPolicy = static_cast(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get()); } - NEO::PreambleHelper::programThreadArbitration(commandContainer.getCommandStream(), threadArbitrationPolicy); + NEO::StreamProperties streamProperties{}; + streamProperties.stateComputeMode.threadArbitrationPolicy.set(threadArbitrationPolicy); + NEO::EncodeComputeMode::programComputeModeCommand(*commandContainer.getCommandStream(), streamProperties.stateComputeMode, + this->device->getHwInfo()); } template diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index 847533773d..3a06abbac3 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -283,29 +283,8 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingHostMemor ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_EQ(cmdList.end(), itor); + auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(8u, allPcCommands.size()); } HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingDeviceMemoryWithSignalAndWaitEventsUsingRenderEngineThenPipeControlIsNotFound, PlatformSupport) { @@ -351,26 +330,8 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyRegionHavingDeviceMem ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_EQ(cmdList.end(), itor); + auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(7u, allPcCommands.size()); context->freeMem(srcBuffer); context->freeMem(dstBuffer); @@ -412,26 +373,8 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingDeviceMemoryWit ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_EQ(cmdList.end(), itor); + auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(7u, allPcCommands.size()); context->freeMem(dstBuffer); } @@ -473,29 +416,8 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingSharedMemoryWit ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_EQ(cmdList.end(), itor); + auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(8u, allPcCommands.size()); context->freeMem(dstBuffer); } @@ -537,26 +459,8 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingHostMemoryWithS ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); - auto itor = find(cmdList.begin(), cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_EQ(cmdList.end(), itor); + auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(7u, allPcCommands.size()); context->freeMem(dstBuffer); } @@ -601,16 +505,9 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic auto itor = find(cmdList.begin(), cmdList.end()); EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - itor++; - itor = find(itor, cmdList.end()); - EXPECT_NE(cmdList.end(), itor); - auto cmd = genCmdCast(*itor); + auto allPcCommands = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(7u, allPcCommands.size()); + auto cmd = genCmdCast(*allPcCommands.back()); EXPECT_TRUE(cmd->getDcFlushEnable()); context->freeMem(dstBuffer); @@ -662,6 +559,12 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryFillHavingEventsWithDevic itor++; itor = find(itor, cmdList.end()); EXPECT_NE(cmdList.end(), itor); + itor++; + itor = find(itor, cmdList.end()); + EXPECT_NE(cmdList.end(), itor); + itor++; + itor = find(itor, cmdList.end()); + EXPECT_NE(cmdList.end(), itor); auto cmd = genCmdCast(*itor); EXPECT_FALSE(cmd->getDcFlushEnable()); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 2030594a43..12e6eab3b4 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -154,8 +154,7 @@ HWTEST_F(UltCommandStreamReceiverTest, givenPreambleSentAndThreadArbitrationPoli auto actualDifferenceForPreamble = policyChangedPreamble - policyNotChangedPreamble; auto actualDifferenceForFlush = policyChangedFlush - policyNotChangedFlush; - auto expectedDifference = PreambleHelper::getThreadArbitrationCommandsSize() + - EncodeComputeMode::getCmdSizeForComputeMode(*defaultHwInfo, false, commandStreamReceiver.isRcs()); + auto expectedDifference = EncodeComputeMode::getCmdSizeForComputeMode(*defaultHwInfo, false, commandStreamReceiver.isRcs()); EXPECT_EQ(0u, actualDifferenceForPreamble); EXPECT_EQ(expectedDifference, actualDifferenceForFlush); } diff --git a/opencl/test/unit_test/gen11/coherency_tests_gen11.cpp b/opencl/test/unit_test/gen11/coherency_tests_gen11.cpp index 27fcad133e..2074d07a2e 100644 --- a/opencl/test/unit_test/gen11/coherency_tests_gen11.cpp +++ b/opencl/test/unit_test/gen11/coherency_tests_gen11.cpp @@ -44,7 +44,6 @@ struct Gen11CoherencyRequirements : public ::testing::Test { }; GEN11TEST_F(Gen11CoherencyRequirements, GivenSettingsWhenCoherencyRequestedThenProgrammingIsCorrect) { - auto lriSize = sizeof(MI_LOAD_REGISTER_IMM); overrideCoherencyRequest(false, false); EXPECT_FALSE(csr->streamProperties.stateComputeMode.isDirty()); @@ -52,14 +51,10 @@ GEN11TEST_F(Gen11CoherencyRequirements, GivenSettingsWhenCoherencyRequestedThenP EXPECT_FALSE(csr->streamProperties.stateComputeMode.isDirty()); overrideCoherencyRequest(true, true); - auto retSize = csr->getCmdSizeForComputeMode(); EXPECT_TRUE(csr->streamProperties.stateComputeMode.isDirty()); - EXPECT_EQ(lriSize, retSize); overrideCoherencyRequest(true, false); - retSize = csr->getCmdSizeForComputeMode(); EXPECT_TRUE(csr->streamProperties.stateComputeMode.isDirty()); - EXPECT_EQ(lriSize, retSize); } GEN11TEST_F(Gen11CoherencyRequirements, GivenSettingsWhenCoherencyRequestedThenHdcModeCmdValuesAreCorrect) { @@ -73,14 +68,14 @@ GEN11TEST_F(Gen11CoherencyRequirements, GivenSettingsWhenCoherencyRequestedThenH overrideCoherencyRequest(true, false); csr->programComputeMode(stream, flags, *defaultHwInfo); - EXPECT_EQ(csr->getCmdSizeForComputeMode(), stream.getUsed()); + EXPECT_EQ(lriSize, stream.getUsed()); auto cmd = reinterpret_cast(stream.getCpuBase()); EXPECT_TRUE(memcmp(&expectedCmd, cmd, lriSize) == 0); overrideCoherencyRequest(true, true); csr->programComputeMode(stream, flags, *defaultHwInfo); - EXPECT_EQ(csr->getCmdSizeForComputeMode() * 2, stream.getUsed()); + EXPECT_EQ(lriSize * 2, stream.getUsed()); cmd = reinterpret_cast(ptrOffset(stream.getCpuBase(), lriSize)); expectedCmd.setDataDword(DwordBuilder::build(gen11HdcModeRegister::forceNonCoherentEnableBit, true, false)); diff --git a/opencl/test/unit_test/gen9/coherency_tests_gen9.cpp b/opencl/test/unit_test/gen9/coherency_tests_gen9.cpp index 187bbdd784..b0db7b1a16 100644 --- a/opencl/test/unit_test/gen9/coherency_tests_gen9.cpp +++ b/opencl/test/unit_test/gen9/coherency_tests_gen9.cpp @@ -22,14 +22,10 @@ GEN9TEST_F(Gen9CoherencyRequirements, WhenMemoryManagerIsInitializedThenNoCohere DispatchFlags flags = DispatchFlagsHelper::createDefaultDispatchFlags(); auto &csr = deviceFactory.rootDevices[0]->getUltCommandStreamReceiver(); - auto retSize = csr.getCmdSizeForComputeMode(); - EXPECT_EQ(0u, retSize); csr.programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(0u, stream.getUsed()); flags.requiresCoherency = true; - retSize = csr.getCmdSizeForComputeMode(); - EXPECT_EQ(0u, retSize); csr.programComputeMode(stream, flags, *defaultHwInfo); EXPECT_EQ(0u, stream.getUsed()); } diff --git a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp index 459ae69dcd..982c73a174 100644 --- a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp @@ -23,12 +23,8 @@ using namespace NEO; using ThreadArbitrationXeHPAndLater = PreambleFixture; -HWCMDTEST_F(IGFX_XE_HP_CORE, ThreadArbitrationXeHPAndLater, givenPolicyWhenThreadArbitrationProgrammedThenDoNothing) { - LinearStream &cs = linearStream; - - PreambleHelper::programThreadArbitration(&cs, ThreadArbitrationPolicy::RoundRobin); - - EXPECT_EQ(0u, cs.getUsed()); +using Platforms = IsWithinGfxCore; +HWTEST2_F(ThreadArbitrationXeHPAndLater, whenGetDefaultThreadArbitrationPolicyIsCalledThenCorrectPolicyIsReturned, Platforms) { EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, HwHelperHw::get().getDefaultThreadArbitrationPolicy()); } diff --git a/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp index 319bbafb18..009b73d551 100644 --- a/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/excludes_ocl_xe_hpc_core.cpp @@ -10,7 +10,6 @@ HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenAlignedCacheableReadOnlyBufferThenChoseOclBufferPolicy, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(BufferSetSurfaceTests, givenBufferSetSurfaceThatMemoryIsUnalignedToCachelineButReadOnlyThenL3CacheShouldBeStillOn, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwHelperTestXeHPAndLater, givenVariousCachesRequestProperMOCSIndexesAreBeingReturned, IGFX_XE_HPC_CORE); -HWTEST_EXCLUDE_PRODUCT(ThreadArbitrationXeHPAndLater, givenPolicyWhenThreadArbitrationProgrammedThenDoNothing, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(PipeControlHelperTestsXeHPAndLater, WhenAddingPipeControlWAThenCorrectCommandsAreProgrammed, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(QueueFamilyNameTest, givenRcsWhenGettingQueueFamilyNameThenReturnProperValue, IGFX_XE_HPC_CORE); HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenVariousValuesWhenConvertingHwRevIdAndSteppingThenConversionIsCorrect, IGFX_XE_HPC_CORE); diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 696286b0e1..64a68b3a2e 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -333,10 +333,6 @@ inline void EncodeDispatchKernel::encodeAdditionalWalkerFields(const Har template void EncodeDispatchKernel::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {} -template -inline void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, const HardwareInfo &hwInfo) { -} - template inline void EncodeComputeMode::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) { } diff --git a/shared/source/command_container/encode_compute_mode_bdw_and_later.inl b/shared/source/command_container/encode_compute_mode_bdw_and_later.inl index 6649af77a9..c2e3ebc6a4 100644 --- a/shared/source/command_container/encode_compute_mode_bdw_and_later.inl +++ b/shared/source/command_container/encode_compute_mode_bdw_and_later.inl @@ -11,15 +11,12 @@ namespace NEO { -template -size_t EncodeComputeMode::getCmdSizeForComputeMode(const HardwareInfo &hwInfo, bool hasSharedHandles, bool isRcs) { - return 0u; -} - template inline void EncodeComputeMode::programComputeModeCommandWithSynchronization( LinearStream &csr, StateComputeModeProperties &properties, const PipelineSelectArgs &args, bool hasSharedHandles, const HardwareInfo &hwInfo, bool isRcs) { + + EncodeComputeMode::programComputeModeCommand(csr, properties, hwInfo); } template diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index fdebbc5399..fe2436f029 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -340,11 +340,6 @@ CompletionStamp CommandStreamReceiverHw::flushTask( addPipeControlBefore3dState(commandStreamCSR, dispatchFlags); programPerDssBackedBuffer(commandStreamCSR, device, dispatchFlags); - if (this->streamProperties.stateComputeMode.threadArbitrationPolicy.isDirty) { - auto threadArbitrationPolicy = this->streamProperties.stateComputeMode.threadArbitrationPolicy.value; - PreambleHelper::programThreadArbitration(&commandStreamCSR, threadArbitrationPolicy); - } - stateBaseAddressDirty |= ((GSBAFor32BitProgrammed ^ dispatchFlags.gsba32BitRequired) && force32BitAllocations); programVFEState(commandStreamCSR, dispatchFlags, device.getDeviceInfo().maxFrontEndThreads); @@ -654,6 +649,15 @@ void CommandStreamReceiverHw::forcePipeControl(NEO::LinearStream &com MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); } +template +void CommandStreamReceiverHw::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo) { + if (this->streamProperties.stateComputeMode.isDirty()) { + EncodeComputeMode::programComputeModeCommandWithSynchronization( + stream, this->streamProperties.stateComputeMode, dispatchFlags.pipelineSelectArgs, + hasSharedHandles(), hwInfo, isRcs()); + } +} + template inline void CommandStreamReceiverHw::programStallingCommandsForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) { stallingCommandsOnNextFlushRequired = false; @@ -829,10 +833,6 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat size += TimestampPacketHelper::getRequiredCmdStreamSize(dispatchFlags.csrDependencies); size += TimestampPacketHelper::getRequiredCmdStreamSizeForTaskCountContainer(dispatchFlags.csrDependencies); - if (this->streamProperties.stateComputeMode.threadArbitrationPolicy.isDirty) { - size += PreambleHelper::getThreadArbitrationCommandsSize(); - } - if (stallingCommandsOnNextFlushRequired) { size += getCmdSizeForStallingCommands(dispatchFlags); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl index e577322fb5..f46d653f65 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl @@ -12,14 +12,6 @@ #include "shared/source/os_interface/hw_info_config.h" namespace NEO { -template -void CommandStreamReceiverHw::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo) { - if (this->streamProperties.stateComputeMode.isDirty()) { - EncodeComputeMode::programComputeModeCommandWithSynchronization( - stream, this->streamProperties.stateComputeMode, dispatchFlags.pipelineSelectArgs, - hasSharedHandles(), hwInfo, isRcs()); - } -} template <> inline void CommandStreamReceiverHw::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) { diff --git a/shared/source/gen11/command_encoder_gen11.cpp b/shared/source/gen11/command_encoder_gen11.cpp index aec1f3bee8..203f453b1b 100644 --- a/shared/source/gen11/command_encoder_gen11.cpp +++ b/shared/source/gen11/command_encoder_gen11.cpp @@ -9,6 +9,8 @@ #include "shared/source/gen11/hw_cmds_base.h" #include "shared/source/gen11/reg_configs.h" +#include "reg_configs_common.h" + using Family = NEO::ICLFamily; #include "shared/source/command_container/command_encoder.inl" @@ -18,21 +20,6 @@ using Family = NEO::ICLFamily; namespace NEO { -template <> -size_t EncodeComputeMode::getCmdSizeForComputeMode(const HardwareInfo &hwInfo, bool hasSharedHandles, bool isRcs) { - return sizeof(typename Family::MI_LOAD_REGISTER_IMM); -} - -template <> -void EncodeComputeMode::programComputeModeCommandWithSynchronization( - LinearStream &csr, StateComputeModeProperties &properties, const PipelineSelectArgs &args, - bool hasSharedHandles, const HardwareInfo &hwInfo, bool isRcs) { - LriHelper::program(&csr, - gen11HdcModeRegister::address, - DwordBuilder::build(gen11HdcModeRegister::forceNonCoherentEnableBit, true, !properties.isCoherencyRequired.value), - false); -} - template <> bool EncodeSurfaceState::doBindingTablePrefetch() { return false; @@ -53,6 +40,36 @@ void EncodeSurfaceState::setFlagsForMediaCompression(R_SURFACE_STATE *su } } +template +size_t EncodeComputeMode::getCmdSizeForComputeMode(const HardwareInfo &hwInfo, bool hasSharedHandles, bool isRcs) { + return sizeof(typename Family::PIPE_CONTROL) + 2u * sizeof(typename Family::MI_LOAD_REGISTER_IMM); +} + +template <> +void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, + const HardwareInfo &hwInfo) { + using PIPE_CONTROL = typename Family::PIPE_CONTROL; + + if (properties.threadArbitrationPolicy.isDirty) { + auto pipeControl = csr.getSpaceForCmd(); + PIPE_CONTROL cmd = Family::cmdInitPipeControl; + cmd.setCommandStreamerStallEnable(true); + *pipeControl = cmd; + + LriHelper::program(&csr, + RowChickenReg4::address, + RowChickenReg4::regDataForArbitrationPolicy[properties.threadArbitrationPolicy.value], + false); + } + if (properties.isCoherencyRequired.isDirty) { + auto nonCoherentEnable = !properties.isCoherencyRequired.value; + LriHelper::program(&csr, + gen11HdcModeRegister::address, + DwordBuilder::build(gen11HdcModeRegister::forceNonCoherentEnableBit, true, nonCoherentEnable), + false); + } +} + template struct EncodeDispatchKernel; template struct EncodeStates; template struct EncodeMath; diff --git a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp index 1b1f4f3dd2..8baee87c09 100644 --- a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp +++ b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp @@ -17,15 +17,6 @@ namespace NEO { typedef ICLFamily Family; static auto gfxCore = IGFX_GEN11_CORE; -template <> -void CommandStreamReceiverHw::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo) { - if (this->streamProperties.stateComputeMode.isCoherencyRequired.isDirty) { - EncodeComputeMode::programComputeModeCommandWithSynchronization( - stream, this->streamProperties.stateComputeMode, dispatchFlags.pipelineSelectArgs, - hasSharedHandles(), hwInfo, isRcs()); - } -} - template <> void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, DispatchFlags &dispatchFlags) { using PWR_CLK_STATE_REGISTER = Family::PWR_CLK_STATE_REGISTER; diff --git a/shared/source/gen11/preamble_gen11.cpp b/shared/source/gen11/preamble_gen11.cpp index b8186c52f7..006f2b6727 100644 --- a/shared/source/gen11/preamble_gen11.cpp +++ b/shared/source/gen11/preamble_gen11.cpp @@ -63,26 +63,6 @@ void PreambleHelper::addPipeControlBeforeVfeCmd(LinearStream *pComman *pipeControl = cmd; } -template <> -void PreambleHelper::programThreadArbitration(LinearStream *pCommandStream, int32_t requiredThreadArbitrationPolicy) { - UNRECOVERABLE_IF(requiredThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent); - - auto pipeControl = pCommandStream->getSpaceForCmd(); - PIPE_CONTROL cmd = ICLFamily::cmdInitPipeControl; - cmd.setCommandStreamerStallEnable(true); - *pipeControl = cmd; - - LriHelper::program(pCommandStream, - RowChickenReg4::address, - RowChickenReg4::regDataForArbitrationPolicy[requiredThreadArbitrationPolicy], - false); -} - -template <> -size_t PreambleHelper::getThreadArbitrationCommandsSize() { - return sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL); -} - template <> std::vector PreambleHelper::getSupportedThreadArbitrationPolicies() { std::vector retVal; diff --git a/shared/source/gen12lp/preamble_gen12lp.cpp b/shared/source/gen12lp/preamble_gen12lp.cpp index c97dfb69cf..1530c36ca6 100644 --- a/shared/source/gen12lp/preamble_gen12lp.cpp +++ b/shared/source/gen12lp/preamble_gen12lp.cpp @@ -11,8 +11,6 @@ #include "shared/source/helpers/preamble_bdw_and_later.inl" #include "shared/source/os_interface/hw_info_config.h" -#include "reg_configs_common.h" - namespace NEO { template <> diff --git a/shared/source/gen8/command_encoder_gen8.cpp b/shared/source/gen8/command_encoder_gen8.cpp index 9ba956ed85..ea9ed9a2ec 100644 --- a/shared/source/gen8/command_encoder_gen8.cpp +++ b/shared/source/gen8/command_encoder_gen8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -17,6 +17,7 @@ using Family = NEO::BDWFamily; #include "shared/source/command_container/image_surface_state/compression_params_bdw_and_later.inl" namespace NEO { + template <> void EncodeSurfaceState::setAuxParamsForMCSCCS(R_SURFACE_STATE *surfaceState) { } @@ -32,6 +33,16 @@ void EncodeSurfaceState::setFlagsForMediaCompression(R_SURFACE_STATE *su } } +template +size_t EncodeComputeMode::getCmdSizeForComputeMode(const HardwareInfo &hwInfo, bool hasSharedHandles, bool isRcs) { + return 0u; +} + +template +void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, + const HardwareInfo &hwInfo) { +} + template struct EncodeDispatchKernel; template struct EncodeStates; template struct EncodeMath; diff --git a/shared/source/gen8/command_stream_receiver_hw_gen8.cpp b/shared/source/gen8/command_stream_receiver_hw_gen8.cpp index f593c4321a..1fdd5a3247 100644 --- a/shared/source/gen8/command_stream_receiver_hw_gen8.cpp +++ b/shared/source/gen8/command_stream_receiver_hw_gen8.cpp @@ -15,10 +15,6 @@ namespace NEO { typedef BDWFamily Family; static auto gfxCore = IGFX_GEN8_CORE; -template <> -void CommandStreamReceiverHw::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo) { -} - template <> void populateFactoryTable>() { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; diff --git a/shared/source/gen9/command_encoder_gen9.cpp b/shared/source/gen9/command_encoder_gen9.cpp index 9a2ea65ee5..7da62558c1 100644 --- a/shared/source/gen9/command_encoder_gen9.cpp +++ b/shared/source/gen9/command_encoder_gen9.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -31,6 +31,29 @@ void EncodeSurfaceState::setFlagsForMediaCompression(R_SURFACE_STATE *su surfaceState->setAuxiliarySurfaceMode(Family::RENDER_SURFACE_STATE::AUXILIARY_SURFACE_MODE::AUXILIARY_SURFACE_MODE_AUX_NONE); } } +template +size_t EncodeComputeMode::getCmdSizeForComputeMode(const HardwareInfo &hwInfo, bool hasSharedHandles, bool isRcs) { + return sizeof(typename Family::PIPE_CONTROL) + sizeof(typename Family::MI_LOAD_REGISTER_IMM); +} + +template +void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, StateComputeModeProperties &properties, + const HardwareInfo &hwInfo) { + using PIPE_CONTROL = typename Family::PIPE_CONTROL; + UNRECOVERABLE_IF(properties.threadArbitrationPolicy.value == ThreadArbitrationPolicy::NotPresent); + + if (properties.threadArbitrationPolicy.isDirty) { + auto pipeControl = csr.getSpaceForCmd(); + PIPE_CONTROL cmd = SKLFamily::cmdInitPipeControl; + cmd.setCommandStreamerStallEnable(true); + *pipeControl = cmd; + + LriHelper::program(&csr, + DebugControlReg2::address, + DebugControlReg2::getRegData(properties.threadArbitrationPolicy.value), + false); + } +} template struct EncodeDispatchKernel; template struct EncodeStates; diff --git a/shared/source/gen9/command_stream_receiver_hw_gen9.cpp b/shared/source/gen9/command_stream_receiver_hw_gen9.cpp index bfb7c0083d..e54241aeee 100644 --- a/shared/source/gen9/command_stream_receiver_hw_gen9.cpp +++ b/shared/source/gen9/command_stream_receiver_hw_gen9.cpp @@ -15,10 +15,6 @@ namespace NEO { typedef SKLFamily Family; static auto gfxCore = IGFX_GEN9_CORE; -template <> -void CommandStreamReceiverHw::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo) { -} - template <> void populateFactoryTable>() { extern CommandStreamReceiverCreateFunc commandStreamReceiverFactory[2 * IGFX_MAX_CORE]; diff --git a/shared/source/gen9/preamble_gen9.cpp b/shared/source/gen9/preamble_gen9.cpp index 5fe5f0cac2..86ecdf09f5 100644 --- a/shared/source/gen9/preamble_gen9.cpp +++ b/shared/source/gen9/preamble_gen9.cpp @@ -63,26 +63,6 @@ void PreambleHelper::addPipeControlBeforeVfeCmd(LinearStream *pComman *pipeControl = cmd; } -template <> -void PreambleHelper::programThreadArbitration(LinearStream *pCommandStream, int32_t requiredThreadArbitrationPolicy) { - UNRECOVERABLE_IF(requiredThreadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent); - - auto pipeControl = pCommandStream->getSpaceForCmd(); - PIPE_CONTROL cmd = SKLFamily::cmdInitPipeControl; - cmd.setCommandStreamerStallEnable(true); - *pipeControl = cmd; - - LriHelper::program(pCommandStream, - DebugControlReg2::address, - DebugControlReg2::getRegData(requiredThreadArbitrationPolicy), - false); -} - -template <> -size_t PreambleHelper::getThreadArbitrationCommandsSize() { - return sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL); -} - template <> std::vector PreambleHelper::getSupportedThreadArbitrationPolicies() { std::vector retVal; diff --git a/shared/source/helpers/preamble.h b/shared/source/helpers/preamble.h index c9c0b12e1c..5b0180b218 100644 --- a/shared/source/helpers/preamble.h +++ b/shared/source/helpers/preamble.h @@ -38,7 +38,6 @@ struct PreambleHelper { const PipelineSelectArgs &pipelineSelectArgs, const HardwareInfo &hwInfo); static void appendProgramPipelineSelect(void *cmd, bool isSpecialModeSelected, const HardwareInfo &hwInfo); - static void programThreadArbitration(LinearStream *pCommandStream, int32_t requiredThreadArbitrationPolicy); static void programPreemption(LinearStream *pCommandStream, Device &device, GraphicsAllocation *preemptionCsr); static void addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, EngineGroupType engineGroupType); static void appendProgramVFEState(const HardwareInfo &hwInfo, const StreamProperties &streamProperties, void *cmd); @@ -63,7 +62,6 @@ struct PreambleHelper { static bool isSpecialPipelineSelectModeChanged(bool lastSpecialPipelineSelectMode, bool newSpecialPipelineSelectMode, const HardwareInfo &hwInfo); static size_t getAdditionalCommandsSize(const Device &device); - static size_t getThreadArbitrationCommandsSize(); static std::vector getSupportedThreadArbitrationPolicies(); static size_t getVFECommandsSize(); static size_t getKernelDebuggingCommandsSize(bool debuggingActive); diff --git a/shared/source/helpers/preamble_base.inl b/shared/source/helpers/preamble_base.inl index 24e1c7b0ee..42a8751b6a 100644 --- a/shared/source/helpers/preamble_base.inl +++ b/shared/source/helpers/preamble_base.inl @@ -20,15 +20,6 @@ namespace NEO { -template -void PreambleHelper::programThreadArbitration(LinearStream *pCommandStream, int32_t requiredThreadArbitrationPolicy) { -} - -template -size_t PreambleHelper::getThreadArbitrationCommandsSize() { - return 0; -} - template std::vector PreambleHelper::getSupportedThreadArbitrationPolicies() { return {}; diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index 7e295662f4..ddcddba5ea 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -153,8 +153,6 @@ void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, Sta hwInfoConfig.setForceNonCoherent(&stateComputeMode, properties); - HwInfoConfig::get(hwInfo.platform.eProductFamily)->setForceNonCoherent(&stateComputeMode, properties); - auto buffer = csr.getSpaceForCmd(); *buffer = stateComputeMode; } diff --git a/shared/test/common/gen11/test_preamble_gen11.cpp b/shared/test/common/gen11/test_preamble_gen11.cpp index 1bb2351cd6..c5c373b724 100644 --- a/shared/test/common/gen11/test_preamble_gen11.cpp +++ b/shared/test/common/gen11/test_preamble_gen11.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -137,7 +137,6 @@ GEN11TEST_F(ThreadArbitrationGen11, givenPreambleWhenItIsProgrammedThenThreadArb MockDevice device; EXPECT_EQ(0u, PreambleHelper::getAdditionalCommandsSize(device)); - EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL), PreambleHelper::getThreadArbitrationCommandsSize()); } GEN11TEST_F(ThreadArbitrationGen11, whenThreadArbitrationPolicyIsProgrammedThenCorrectValuesAreSet) { @@ -147,7 +146,9 @@ GEN11TEST_F(ThreadArbitrationGen11, whenThreadArbitrationPolicyIsProgrammedThenC typedef ICLFamily::PIPE_CONTROL PIPE_CONTROL; LinearStream &cs = linearStream; MockDevice mockDevice; - PreambleHelper::programThreadArbitration(&linearStream, ThreadArbitrationPolicy::RoundRobin); + StreamProperties streamProperties{}; + streamProperties.stateComputeMode.threadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin); + EncodeComputeMode::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, *defaultHwInfo); parseCommands(cs); @@ -161,7 +162,6 @@ GEN11TEST_F(ThreadArbitrationGen11, whenThreadArbitrationPolicyIsProgrammedThenC MockDevice device; EXPECT_EQ(0u, PreambleHelper::getAdditionalCommandsSize(device)); - EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL), PreambleHelper::getThreadArbitrationCommandsSize()); } GEN11TEST_F(ThreadArbitrationGen11, GivenDefaultWhenProgrammingPreambleThenArbitrationPolicyIsRoundRobin) { diff --git a/shared/test/common/gen11/test_traits_gen11.h b/shared/test/common/gen11/test_traits_gen11.h index dc8135cb30..151112aca3 100644 --- a/shared/test/common/gen11/test_traits_gen11.h +++ b/shared/test/common/gen11/test_traits_gen11.h @@ -19,4 +19,6 @@ struct TestTraits { static constexpr bool implementsPreambleThreadArbitration = true; static constexpr bool forceGpuNonCoherent = false; static constexpr bool imagesSupported = true; + static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true; + static constexpr bool programComputeModeCommandProgramsNonCoherent = true; }; diff --git a/shared/test/common/gen12lp/test_preamble_gen12lp.cpp b/shared/test/common/gen12lp/test_preamble_gen12lp.cpp index c35b9b88bb..bcd64127a4 100644 --- a/shared/test/common/gen12lp/test_preamble_gen12lp.cpp +++ b/shared/test/common/gen12lp/test_preamble_gen12lp.cpp @@ -180,12 +180,7 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenDisableEUFusionWhenProgramAdditionalFiel } typedef PreambleFixture ThreadArbitrationGen12Lp; -GEN12LPTEST_F(ThreadArbitrationGen12Lp, givenPolicyWhenThreadArbitrationProgrammedThenDoNothing) { - LinearStream &cs = linearStream; - - PreambleHelper::programThreadArbitration(&cs, ThreadArbitrationPolicy::RoundRobin); - - EXPECT_EQ(0u, cs.getUsed()); +GEN12LPTEST_F(ThreadArbitrationGen12Lp, whenGetDefaultThreadArbitrationPolicyIsCalledThenCorrectPolicyIsReturned) { EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, HwHelperHw::get().getDefaultThreadArbitrationPolicy()); } diff --git a/shared/test/common/gen8/test_preamble_gen8.cpp b/shared/test/common/gen8/test_preamble_gen8.cpp index 1650e9ee63..b8c5ff4e6f 100644 --- a/shared/test/common/gen8/test_preamble_gen8.cpp +++ b/shared/test/common/gen8/test_preamble_gen8.cpp @@ -74,13 +74,14 @@ BDWTEST_F(ThreadArbitrationGen8, givenPolicyWhenThreadArbitrationProgrammedThenD typedef BDWFamily::MI_LOAD_REGISTER_IMM MI_LOAD_REGISTER_IMM; LinearStream &cs = linearStream; - PreambleHelper::programThreadArbitration(&cs, ThreadArbitrationPolicy::RoundRobin); + StreamProperties streamProperties{}; + streamProperties.stateComputeMode.threadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin); + EncodeComputeMode::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, *defaultHwInfo); EXPECT_EQ(0u, cs.getUsed()); MockDevice device; EXPECT_EQ(0u, PreambleHelper::getAdditionalCommandsSize(device)); - EXPECT_EQ(0u, PreambleHelper::getThreadArbitrationCommandsSize()); EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, HwHelperHw::get().getDefaultThreadArbitrationPolicy()); } diff --git a/shared/test/common/gen8/test_traits_gen8.h b/shared/test/common/gen8/test_traits_gen8.h index b96ea1ef4f..558659d651 100644 --- a/shared/test/common/gen8/test_traits_gen8.h +++ b/shared/test/common/gen8/test_traits_gen8.h @@ -18,4 +18,6 @@ struct TestTraits { static constexpr bool implementsPreambleThreadArbitration = false; static constexpr bool forceGpuNonCoherent = false; static constexpr bool imagesSupported = true; + static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = false; + static constexpr bool programComputeModeCommandProgramsNonCoherent = false; }; diff --git a/shared/test/common/gen9/preamble_tests_gen9.cpp b/shared/test/common/gen9/preamble_tests_gen9.cpp index 656e0720ea..fac58bae94 100644 --- a/shared/test/common/gen9/preamble_tests_gen9.cpp +++ b/shared/test/common/gen9/preamble_tests_gen9.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -90,7 +90,6 @@ GEN9TEST_F(ThreadArbitrationGen9, givenPreambleWhenItIsProgrammedThenThreadArbit MockDevice device; EXPECT_EQ(0u, PreambleHelper::getAdditionalCommandsSize(device)); - EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL), PreambleHelper::getThreadArbitrationCommandsSize()); } GEN9TEST_F(ThreadArbitrationGen9, whenThreadArbitrationPolicyIsProgrammedThenCorrectValuesAreSet) { @@ -100,7 +99,9 @@ GEN9TEST_F(ThreadArbitrationGen9, whenThreadArbitrationPolicyIsProgrammedThenCor typedef SKLFamily::PIPE_CONTROL PIPE_CONTROL; LinearStream &cs = linearStream; MockDevice mockDevice; - PreambleHelper::programThreadArbitration(&linearStream, ThreadArbitrationPolicy::RoundRobin); + StreamProperties streamProperties{}; + streamProperties.stateComputeMode.threadArbitrationPolicy.set(ThreadArbitrationPolicy::RoundRobin); + EncodeComputeMode::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, *defaultHwInfo); parseCommands(cs); @@ -116,5 +117,4 @@ GEN9TEST_F(ThreadArbitrationGen9, whenThreadArbitrationPolicyIsProgrammedThenCor MockDevice device; EXPECT_EQ(0u, PreambleHelper::getAdditionalCommandsSize(device)); - EXPECT_EQ(sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL), PreambleHelper::getThreadArbitrationCommandsSize()); } diff --git a/shared/test/common/gen9/test_traits_gen9.h b/shared/test/common/gen9/test_traits_gen9.h index f0c13fe490..5c5748dd17 100644 --- a/shared/test/common/gen9/test_traits_gen9.h +++ b/shared/test/common/gen9/test_traits_gen9.h @@ -18,4 +18,6 @@ struct TestTraits { static constexpr bool implementsPreambleThreadArbitration = true; static constexpr bool forceGpuNonCoherent = false; static constexpr bool imagesSupported = true; + static constexpr bool programComputeModeCommandProgramsThreadArbitrationPolicy = true; + static constexpr bool programComputeModeCommandProgramsNonCoherent = false; }; diff --git a/shared/test/unit_test/encoders/test_encode_states.cpp b/shared/test/unit_test/encoders/test_encode_states.cpp index 8b7dfe1631..32770a61df 100644 --- a/shared/test/unit_test/encoders/test_encode_states.cpp +++ b/shared/test/unit_test/encoders/test_encode_states.cpp @@ -13,7 +13,9 @@ #include "shared/test/common/fixtures/front_window_fixture.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/unit_test_helper.h" +#include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_device.h" +#include "shared/test/common/mocks/ult_device_factory.h" #include "test_traits_common.h" @@ -358,11 +360,69 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, whenAdjustPipelineSelectIsC EXPECT_EQ(initialUsed, cmdContainer->getCommandStream()->getUsed()); } -HWTEST2_F(CommandEncodeStatesTest, whenAdjustStateComputeModeIsCalledThenNothingHappens, IsAtMostGen11) { - using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; +HWTEST2_F(CommandEncodeStatesTest, whenProgramComputeModeCommandModeIsCalledThenThreadArbitrationPolicyIsProgrammed, IsAtMostGen11) { + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; auto initialUsed = cmdContainer->getCommandStream()->getUsed(); - StreamProperties emptyProperties{}; + auto expectedSize = sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL); + StreamProperties streamProperties{}; + streamProperties.stateComputeMode.threadArbitrationPolicy.value = ThreadArbitrationPolicy::AgeBased; + streamProperties.stateComputeMode.threadArbitrationPolicy.isDirty = true; NEO::EncodeComputeMode::programComputeModeCommand(*cmdContainer->getCommandStream(), - emptyProperties.stateComputeMode, *defaultHwInfo); - EXPECT_EQ(initialUsed, cmdContainer->getCommandStream()->getUsed()); + streamProperties.stateComputeMode, *defaultHwInfo); + + if constexpr (TestTraits::programComputeModeCommandProgramsThreadArbitrationPolicy) { + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), initialUsed), cmdContainer->getCommandStream()->getUsed()); + + auto cmdCount = findAll(commands.begin(), commands.end()).size(); + EXPECT_EQ(1u, cmdCount); + cmdCount = findAll(commands.begin(), commands.end()).size(); + EXPECT_EQ(1u, cmdCount); + EXPECT_EQ(initialUsed + expectedSize, cmdContainer->getCommandStream()->getUsed()); + } else { + EXPECT_EQ(initialUsed, cmdContainer->getCommandStream()->getUsed()); + } } + +HWTEST2_F(CommandEncodeStatesTest, whenProgramComputeModeCommandModeIsCalledThenNonCoherentIsProgrammed, IsAtMostGen11) { + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + auto initialUsed = cmdContainer->getCommandStream()->getUsed(); + [[maybe_unused]] auto expectedSize = sizeof(MI_LOAD_REGISTER_IMM); + StreamProperties streamProperties{}; + streamProperties.stateComputeMode.threadArbitrationPolicy.value = ThreadArbitrationPolicy::AgeBased; + streamProperties.stateComputeMode.isCoherencyRequired.isDirty = true; + NEO::EncodeComputeMode::programComputeModeCommand(*cmdContainer->getCommandStream(), + streamProperties.stateComputeMode, *defaultHwInfo); + + if constexpr (TestTraits::programComputeModeCommandProgramsNonCoherent) { + GenCmdList commands; + CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), initialUsed), cmdContainer->getCommandStream()->getUsed()); + + auto cmdCount = findAll(commands.begin(), commands.end()).size(); + EXPECT_EQ(1u, cmdCount); + EXPECT_EQ(initialUsed + expectedSize, cmdContainer->getCommandStream()->getUsed()); + } else { + EXPECT_EQ(initialUsed, cmdContainer->getCommandStream()->getUsed()); + } +} + +HWTEST2_F(CommandEncodeStatesTest, whenGetCmdSizeForComputeModeThenCorrectValueIsReturned, IsAtMostGen11) { + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + auto expectedScmSize = 0u; + + if constexpr (TestTraits::programComputeModeCommandProgramsThreadArbitrationPolicy) { + expectedScmSize += sizeof(MI_LOAD_REGISTER_IMM) + sizeof(PIPE_CONTROL); + } + if constexpr (TestTraits::programComputeModeCommandProgramsNonCoherent) { + expectedScmSize += sizeof(MI_LOAD_REGISTER_IMM); + } + EXPECT_EQ(expectedScmSize, EncodeComputeMode::getCmdSizeForComputeMode(*defaultHwInfo, false, false)); + + UltDeviceFactory deviceFactory{1, 0}; + auto &csr = deviceFactory.rootDevices[0]->getUltCommandStreamReceiver(); + csr.streamProperties.stateComputeMode.setProperties(false, 0, ThreadArbitrationPolicy::AgeBased, *defaultHwInfo); + EXPECT_EQ(expectedScmSize, csr.getCmdSizeForComputeMode()); +} \ No newline at end of file