diff --git a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp index abc9eb841e..c09b335d5a 100644 --- a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp @@ -719,7 +719,27 @@ TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenItHasCorre using LocalWorkSizeTest = ::testing::Test; -HWTEST_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtr) { +HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenWorkgroupSizeIsCorrect, IsAtMostGen11) { + MockClDevice device{new MockDevice}; + MockKernelWithInternals kernel(device); + kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1; + DispatchInfo dispatchInfo; + dispatchInfo.setClDevice(&device); + dispatchInfo.setKernel(kernel.mockKernel); + + const uint32_t maxBarriersPerHSlice = (defaultHwInfo->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16; + const uint32_t nonFusedMinWorkGroupSize = static_cast(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) * + device.getSharedDeviceInfo().numThreadsPerEU * + static_cast(kernel.mockKernel->getKernelInfo().getMaxSimdSize()) / + maxBarriersPerHSlice; + WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo); + + EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); +} + +using IsCoreWithFusedEu = IsWithinGfxCore; + +HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtr, IsCoreWithFusedEu) { MockClDevice device{new MockDevice}; MockKernelWithInternals kernel(device); kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1; @@ -735,11 +755,8 @@ HWTEST_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEu const uint32_t fusedMinWorkGroupSize = 2 * nonFusedMinWorkGroupSize; WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo); - if (defaultHwInfo->platform.eRenderCoreFamily < IGFX_GEN12_CORE) { - EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); - } else { - EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); - } + EXPECT_NE(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); + EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize); } HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtrForcedByDebugManager, IsAtLeastGen12lp) { diff --git a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp index 44b7328fee..6133805616 100644 --- a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp @@ -208,7 +208,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PreambleCfeStateXeHPAndLater, givenSetDebugFlagWhen auto cfeState = reinterpret_cast(*cfeStateIt); - EXPECT_EQ(expectedValue1, cfeState->getFusedEuDispatch()); EXPECT_EQ(expectedValue1, static_cast(cfeState->getOverDispatchControl())); EXPECT_EQ(expectedValue1, cfeState->getLargeGRFThreadAdjustDisable()); EXPECT_EQ(expectedValue2, cfeState->getNumberOfWalkers()); diff --git a/opencl/test/unit_test/xe_hp_core/xehp/test_preamble_xehp.cpp b/opencl/test/unit_test/xe_hp_core/xehp/test_preamble_xehp.cpp index 0dee858b6b..08e2f2a657 100644 --- a/opencl/test/unit_test/xe_hp_core/xehp/test_preamble_xehp.cpp +++ b/opencl/test/unit_test/xe_hp_core/xehp/test_preamble_xehp.cpp @@ -81,3 +81,39 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEComputeOverdispatchDisableSetTrue EXPECT_TRUE(cfeState->getComputeOverdispatchDisable()); } + +HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToTrue, IsXEHP) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + auto hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.fusedEuEnabled = false; + + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); + StreamProperties streamProperties{}; + streamProperties.frontEndState.setProperties(false, false, false, hwInfo); + PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); + parseCommands(linearStream); + auto cfeStateIt = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), cfeStateIt); + auto cfeState = reinterpret_cast(*cfeStateIt); + + EXPECT_TRUE(cfeState->getFusedEuDispatch()); +} + +HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXEHP) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + auto hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.fusedEuEnabled = true; + + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); + StreamProperties streamProperties{}; + streamProperties.frontEndState.setProperties(false, false, false, hwInfo); + PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); + parseCommands(linearStream); + auto cfeStateIt = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), cfeStateIt); + auto cfeState = reinterpret_cast(*cfeStateIt); + + EXPECT_FALSE(cfeState->getFusedEuDispatch()); +} \ No newline at end of file diff --git a/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp b/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp index e0f4c5d40e..fbcb122042 100644 --- a/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp +++ b/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp @@ -9,6 +9,7 @@ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/state_base_address.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/unit_test/preamble/preamble_fixture.h" #include "opencl/source/helpers/cl_memory_properties_helpers.h" #include "opencl/source/mem_obj/buffer.h" @@ -356,3 +357,41 @@ XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenDecompressInL3ForImage2dFr clReleaseMemObject(imageDesc.mem_object); } + +using PreambleCfeState = PreambleFixture; + +HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToTrue, IsXeHpgCore) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + auto hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.fusedEuEnabled = false; + + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); + StreamProperties streamProperties{}; + streamProperties.frontEndState.setProperties(false, false, false, hwInfo); + PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); + parseCommands(linearStream); + auto cfeStateIt = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), cfeStateIt); + auto cfeState = reinterpret_cast(*cfeStateIt); + + EXPECT_TRUE(cfeState->getFusedEuDispatch()); +} + +HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + auto hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.fusedEuEnabled = true; + + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); + StreamProperties streamProperties{}; + streamProperties.frontEndState.setProperties(false, false, false, hwInfo); + PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); + parseCommands(linearStream); + auto cfeStateIt = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), cfeStateIt); + auto cfeState = reinterpret_cast(*cfeStateIt); + + EXPECT_FALSE(cfeState->getFusedEuDispatch()); +} \ No newline at end of file diff --git a/shared/source/helpers/preamble_xehp_and_later.inl b/shared/source/helpers/preamble_xehp_and_later.inl index 10eda4d0e8..e67f7707af 100644 --- a/shared/source/helpers/preamble_xehp_and_later.inl +++ b/shared/source/helpers/preamble_xehp_and_later.inl @@ -108,13 +108,13 @@ void *PreambleHelper::getSpaceForVfeState(LinearStream *pCommandStream, return pCommandStream->getSpace(sizeof(CFE_STATE)); } -template <> -void PreambleHelper::programVfeState(void *pVfeState, - const HardwareInfo &hwInfo, - uint32_t scratchSize, - uint64_t scratchAddress, - uint32_t maxFrontEndThreads, - const StreamProperties &streamProperties) { +template +void PreambleHelper::programVfeState(void *pVfeState, + const HardwareInfo &hwInfo, + uint32_t scratchSize, + uint64_t scratchAddress, + uint32_t maxFrontEndThreads, + const StreamProperties &streamProperties) { using CFE_STATE = typename Family::CFE_STATE; auto cfeState = reinterpret_cast(pVfeState); @@ -136,9 +136,6 @@ void PreambleHelper::programVfeState(void *pVfeState, if (DebugManager.flags.CFEOverDispatchControl.get() != -1) { cmd.setOverDispatchControl(static_cast(DebugManager.flags.CFEOverDispatchControl.get())); } - if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) { - cmd.setFusedEuDispatch(DebugManager.flags.CFEFusedEUDispatch.get()); - } if (DebugManager.flags.CFELargeGRFThreadAdjustDisable.get() != -1) { cmd.setLargeGRFThreadAdjustDisable(DebugManager.flags.CFELargeGRFThreadAdjustDisable.get()); } diff --git a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp index 3997beb5b1..265cc316ea 100644 --- a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp @@ -34,6 +34,11 @@ void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, c if (DebugManager.flags.CFESingleSliceDispatchCCSMode.get() != -1) { command->setSingleSliceDispatchCcsMode(DebugManager.flags.CFESingleSliceDispatchCCSMode.get()); } + + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + if (!hwHelper.isFusedEuDispatchEnabled(hwInfo)) { + command->setFusedEuDispatch(true); + } } template <> diff --git a/shared/source/xe_hpg_core/preamble_xe_hpg_core.cpp b/shared/source/xe_hpg_core/preamble_xe_hpg_core.cpp index 3104da45dd..90948ec302 100644 --- a/shared/source/xe_hpg_core/preamble_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/preamble_xe_hpg_core.cpp @@ -27,6 +27,11 @@ void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, c if (DebugManager.flags.CFESingleSliceDispatchCCSMode.get() != -1) { command->setSingleSliceDispatchCcsMode(DebugManager.flags.CFESingleSliceDispatchCCSMode.get()); } + + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + if (!hwHelper.isFusedEuDispatchEnabled(hwInfo)) { + command->setFusedEuDispatch(true); + } } template <>