diff --git a/shared/source/command_container/encode_alu_helper.h b/shared/source/command_container/encode_alu_helper.h index 8414ef122f..8fa0d20bbb 100644 --- a/shared/source/command_container/encode_alu_helper.h +++ b/shared/source/command_container/encode_alu_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022 Intel Corporation + * Copyright (C) 2022-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -24,6 +24,12 @@ class EncodeAluHelper { aluOps.miMath.DW0.BitField.DwordLength = AluCount - 1; } + void setMocs([[maybe_unused]] uint32_t mocs) { + if constexpr (GfxFamily::isUsingMiMathMocs) { + aluOps.miMath.DW0.BitField.MemoryObjectControlState = mocs; + } + } + void setNextAlu(AluRegisters opcode) { setNextAlu(opcode, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE); } diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 93c4bdde76..922207be13 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -14,6 +14,8 @@ #include "shared/source/direct_submission/direct_submission_hw_diagnostic_mode.h" #include "shared/source/direct_submission/relaxed_ordering_helper.h" #include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/gfx_core_helper.h" @@ -102,6 +104,8 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch uint64_t loopSectionStartAddress = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart; + const uint32_t miMathMocs = this->rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + // 1. Init section { EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); @@ -133,6 +137,7 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch LriHelper::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast(deferredTasksListGpuVa >> 32), true); EncodeAluHelper aluHelper; + aluHelper.setMocs(miMathMocs); aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2); aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_6); aluHelper.setNextAlu(AluRegisters::OPCODE_SHL); @@ -168,6 +173,7 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch LriHelper::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast(deferredTasksListGpuVa >> 32), true); EncodeAluHelper aluHelper; + aluHelper.setMocs(miMathMocs); aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1); aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_7); aluHelper.setNextAlu(AluRegisters::OPCODE_SHL); @@ -238,6 +244,7 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch LriHelper::program(&schedulerCmdStream, CS_GPR_R10 + 4, 0, true); EncodeAluHelper aluHelper; + aluHelper.setMocs(miMathMocs); aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9); aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_10); aluHelper.setNextAlu(AluRegisters::OPCODE_ADD); @@ -815,7 +822,10 @@ void DirectSubmissionHw::preinitializeRelaxedOrderingSect LriHelper::program(&stream, CS_GPR_R8, 8, true); LriHelper::program(&stream, CS_GPR_R8 + 4, 0, true); + const uint32_t miMathMocs = this->rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + EncodeAluHelper aluHelper; + aluHelper.setMocs(miMathMocs); aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1); aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8); aluHelper.setNextAlu(AluRegisters::OPCODE_SHL); diff --git a/shared/source/gen11/hw_cmds_base.h b/shared/source/gen11/hw_cmds_base.h index 15e3ec87b9..8d04785a0a 100644 --- a/shared/source/gen11/hw_cmds_base.h +++ b/shared/source/gen11/hw_cmds_base.h @@ -27,6 +27,7 @@ struct Gen11 { static constexpr bool isUsingGenericMediaStateClear = true; static constexpr bool isUsingMiMemFence = false; static constexpr bool isUsingMiSetPredicate = false; + static constexpr bool isUsingMiMathMocs = false; struct FrontEndStateSupport { static constexpr bool scratchSize = true; diff --git a/shared/source/gen12lp/hw_cmds_base.h b/shared/source/gen12lp/hw_cmds_base.h index e7059082d8..a34b217a4f 100644 --- a/shared/source/gen12lp/hw_cmds_base.h +++ b/shared/source/gen12lp/hw_cmds_base.h @@ -26,6 +26,7 @@ struct Gen12Lp { static constexpr uint32_t stateComputeModeForceNonCoherentMask = (0b11u << 3); static constexpr bool isUsingMiMemFence = false; static constexpr bool isUsingMiSetPredicate = false; + static constexpr bool isUsingMiMathMocs = false; struct FrontEndStateSupport { static constexpr bool scratchSize = true; diff --git a/shared/source/gen8/hw_cmds_base.h b/shared/source/gen8/hw_cmds_base.h index 2386fff095..fb9ea71d2f 100644 --- a/shared/source/gen8/hw_cmds_base.h +++ b/shared/source/gen8/hw_cmds_base.h @@ -28,6 +28,7 @@ struct Gen8 { static constexpr bool isUsingGenericMediaStateClear = true; static constexpr bool isUsingMiMemFence = false; static constexpr bool isUsingMiSetPredicate = false; + static constexpr bool isUsingMiMathMocs = false; struct FrontEndStateSupport { static constexpr bool scratchSize = true; diff --git a/shared/source/gen9/hw_cmds_base.h b/shared/source/gen9/hw_cmds_base.h index 2522912f94..7927ba5aa6 100644 --- a/shared/source/gen9/hw_cmds_base.h +++ b/shared/source/gen9/hw_cmds_base.h @@ -27,6 +27,7 @@ struct Gen9 { static constexpr bool isUsingGenericMediaStateClear = true; static constexpr bool isUsingMiMemFence = false; static constexpr bool isUsingMiSetPredicate = false; + static constexpr bool isUsingMiMathMocs = false; struct FrontEndStateSupport { static constexpr bool scratchSize = true; diff --git a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl index 14996a5fcd..5de2a4b8d2 100644 --- a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl +++ b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl @@ -652,7 +652,8 @@ typedef struct tagMI_MATH { union _DW0 { struct _BitField { uint32_t DwordLength : BITFIELD_RANGE(0, 7); - uint32_t Reserved : BITFIELD_RANGE(8, 22); + uint32_t MemoryObjectControlState : BITFIELD_RANGE(8, 14); + uint32_t Reserved : BITFIELD_RANGE(15, 22); uint32_t InstructionOpcode : BITFIELD_RANGE(23, 28); uint32_t InstructionType : BITFIELD_RANGE(29, 31); } BitField; diff --git a/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl b/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl index 51e60940fb..69a8e3c789 100644 --- a/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl +++ b/shared/source/generated/xe_hpg_core/hw_cmds_generated_xe_hpg_core.inl @@ -401,7 +401,8 @@ typedef struct tagMI_MATH { union _DW0 { struct _BitField { uint32_t DwordLength : BITFIELD_RANGE(0, 7); - uint32_t Reserved : BITFIELD_RANGE(8, 22); + uint32_t MemoryObjectControlState : BITFIELD_RANGE(8, 14); + uint32_t Reserved : BITFIELD_RANGE(15, 22); uint32_t InstructionOpcode : BITFIELD_RANGE(23, 28); uint32_t InstructionType : BITFIELD_RANGE(29, 31); } BitField; diff --git a/shared/source/xe_hp_core/hw_cmds_base.h b/shared/source/xe_hp_core/hw_cmds_base.h index c570cfd5af..5dbeac1de0 100644 --- a/shared/source/xe_hp_core/hw_cmds_base.h +++ b/shared/source/xe_hp_core/hw_cmds_base.h @@ -32,6 +32,7 @@ struct XeHpCore { static constexpr bool isUsingGenericMediaStateClear = true; static constexpr bool isUsingMiMemFence = false; static constexpr bool isUsingMiSetPredicate = true; + static constexpr bool isUsingMiMathMocs = false; struct FrontEndStateSupport { static constexpr bool scratchSize = true; diff --git a/shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h b/shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h index 46fcc9ccb4..a86c3a08c3 100644 --- a/shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h +++ b/shared/source/xe_hpc_core/hw_cmds_xe_hpc_core_base.h @@ -34,6 +34,7 @@ struct XeHpcCore { static constexpr bool isUsingGenericMediaStateClear = true; static constexpr bool isUsingMiMemFence = true; static constexpr bool isUsingMiSetPredicate = true; + static constexpr bool isUsingMiMathMocs = true; struct StateBaseAddressStateSupport { static constexpr bool globalAtomics = false; diff --git a/shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h b/shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h index 0f9c4b7db5..4e512fc86f 100644 --- a/shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h +++ b/shared/source/xe_hpg_core/hw_cmds_xe_hpg_core_base.h @@ -34,6 +34,7 @@ struct XeHpgCore { static constexpr bool isUsingGenericMediaStateClear = true; static constexpr bool isUsingMiMemFence = false; static constexpr bool isUsingMiSetPredicate = true; + static constexpr bool isUsingMiMathMocs = true; struct FrontEndStateSupport { static constexpr bool scratchSize = true; diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index d93f9871bb..e89257802c 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -12,6 +12,8 @@ #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/direct_submission/relaxed_ordering_helper.h" +#include "shared/source/gmm_helper/gmm_helper.h" +#include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/register_offsets.h" @@ -1054,7 +1056,7 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf bool verifyDynamicSchedulerProgramming(LinearStream &cs, uint64_t schedulerAllocationGpuVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset); template - bool verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit); + bool verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit, uint32_t miMathMocs); template bool verifyMiPredicate(void *miPredicateCmd, MiPredicateType predicateType); @@ -1329,7 +1331,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyConditionalDataRegBbStart(void } template -bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit) { +bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit, uint32_t miMathMocs) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; @@ -1412,6 +1414,12 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap return false; } + if constexpr (FamilyType::isUsingMiMathMocs) { + if (miMathCmd->DW0.BitField.MemoryObjectControlState != miMathMocs) { + return false; + } + } + auto miAluCmd = reinterpret_cast(++miMathCmd); if (!verifyAlu(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2)) { return false; @@ -1504,6 +1512,12 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap return false; } + if constexpr (FamilyType::isUsingMiMathMocs) { + if (miMathCmd->DW0.BitField.MemoryObjectControlState != miMathMocs) { + return false; + } + } + miAluCmd = reinterpret_cast(++miMathCmd); if (!verifyAlu(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1)) { return false; @@ -1646,6 +1660,12 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap return false; } + if constexpr (FamilyType::isUsingMiMathMocs) { + if (miMathCmd->DW0.BitField.MemoryObjectControlState != miMathMocs) { + return false; + } + } + miAluCmd = reinterpret_cast(++miMathCmd); if (!verifyAlu(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9)) { return false; @@ -1793,7 +1813,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenDebugFlagSetWhenDispatching EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); EXPECT_TRUE(verifyStaticSchedulerProgramming(*directSubmission.relaxedOrderingSchedulerAllocation, - directSubmission.deferredTasksListAllocation->getGpuAddress(), 123)); + directSubmission.deferredTasksListAllocation->getGpuAddress(), 123, + pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER))); } HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenNewNumberOfClientsWhenDispatchingWorkThenIncraseQueueSize, IsAtLeastXeHpcCore) { @@ -1806,7 +1827,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenNewNumberOfClientsWhenDispa EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); EXPECT_EQ(RelaxedOrderingHelper::queueSizeMultiplier, directSubmission.currentRelaxedOrderingQueueSize); EXPECT_TRUE(verifyStaticSchedulerProgramming(*directSubmission.relaxedOrderingSchedulerAllocation, - directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier)); + directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier, + pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER))); const uint64_t expectedQueueSizeValueVa = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress() + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::drainRequestSectionStart + @@ -1876,7 +1898,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStat EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); EXPECT_TRUE(verifyStaticSchedulerProgramming(*directSubmission.relaxedOrderingSchedulerAllocation, - directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier)); + directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier, + pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER))); } { @@ -2031,6 +2054,10 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchTa auto miMathCmd = reinterpret_cast(++lriCmd); EXPECT_EQ(8u, miMathCmd->DW0.BitField.DwordLength); + if constexpr (FamilyType::isUsingMiMathMocs) { + EXPECT_EQ(miMathCmd->DW0.BitField.MemoryObjectControlState, pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); + } + auto miAluCmd = reinterpret_cast(++miMathCmd); EXPECT_TRUE(verifyAlu(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1));