From 174c27eb313b93df7c6f0f62607abb018830724e Mon Sep 17 00:00:00 2001 From: Konstanty Misiak Date: Thu, 17 Mar 2022 15:53:21 +0000 Subject: [PATCH] Fix CFEFusedEUDispatch debug flag Signed-off-by: Konstanty Misiak --- level_zero/core/source/kernel/kernel_imp.cpp | 2 +- .../unit_tests/sources/kernel/test_kernel.cpp | 2 +- opencl/source/cl_device/cl_device_caps.cpp | 2 +- opencl/source/kernel/kernel.cpp | 2 +- .../command_queue/local_work_size_tests.cpp | 6 +++- .../unit_test/device/device_caps_tests.cpp | 2 +- .../gen12lp/hw_helper_tests_gen12lp.inl | 2 +- .../unit_test/helpers/hw_helper_tests.cpp | 2 +- opencl/test/unit_test/kernel/kernel_tests.cpp | 34 +++++++++++++++++-- .../test_cmds_programming_xe_hpg_core.cpp | 23 ++++++++++++- shared/source/gen12lp/hw_helper_gen12lp.cpp | 14 ++++++++ shared/source/gen12lp/preamble_gen12lp.cpp | 2 +- shared/source/helpers/hw_helper.h | 4 +-- .../source/helpers/hw_helper_bdw_to_icllp.inl | 4 +-- .../helpers/hw_helper_tgllp_and_later.inl | 4 +-- shared/source/program/kernel_info.cpp | 4 ++- .../source/xe_hp_core/preamble_xe_hp_core.cpp | 4 +-- .../xe_hpg_core/hw_helper_xe_hpg_core.cpp | 14 ++++++++ .../xe_hpg_core/preamble_xe_hpg_core.cpp | 2 +- .../common/gen12lp/test_preamble_gen12lp.cpp | 13 +++++++ 20 files changed, 120 insertions(+), 22 deletions(-) diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 5960d6a78b..bda6af49ef 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -738,7 +738,7 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties) preferredGroupSizeProperties->preferredMultiple = this->kernelImmData->getKernelInfo()->getMaxSimdSize(); auto &hwHelper = NEO::HwHelper::get(this->module->getDevice()->getHwInfo().platform.eRenderCoreFamily); - if (hwHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo())) { + if (hwHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo(), kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) { preferredGroupSizeProperties->preferredMultiple *= 2; } } diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index b1a111592b..e7453ca5a1 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -1093,7 +1093,7 @@ TEST_F(KernelPropertiesTests, whenPassingPreferredGroupSizeStructToGetProperties EXPECT_EQ(ZE_RESULT_SUCCESS, res); auto &hwHelper = NEO::HwHelper::get(module->getDevice()->getHwInfo().platform.eRenderCoreFamily); - if (hwHelper.isFusedEuDispatchEnabled(module->getDevice()->getHwInfo())) { + if (hwHelper.isFusedEuDispatchEnabled(module->getDevice()->getHwInfo(), false)) { EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()) * 2); } else { EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize())); diff --git a/opencl/source/cl_device/cl_device_caps.cpp b/opencl/source/cl_device/cl_device_caps.cpp index 1d91e0fddc..2c071f32fa 100644 --- a/opencl/source/cl_device/cl_device_caps.cpp +++ b/opencl/source/cl_device/cl_device_caps.cpp @@ -396,7 +396,7 @@ void ClDevice::initializeCaps() { deviceInfo.preferredLocalAtomicAlignment = MemoryConstants::cacheLineSize; deviceInfo.preferredPlatformAtomicAlignment = MemoryConstants::cacheLineSize; - deviceInfo.preferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(hwInfo) + deviceInfo.preferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(hwInfo, false) ? CommonConstants::maximalSimdSize * 2 : CommonConstants::maximalSimdSize; diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index e301672ed8..1f55072720 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -565,7 +565,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName, case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: preferredWorkGroupSizeMultiple = kernelInfo.getMaxSimdSize(); - if (hwHelper.isFusedEuDispatchEnabled(hwInfo) && !kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion) { + if (hwHelper.isFusedEuDispatchEnabled(hwInfo, kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) { preferredWorkGroupSizeMultiple *= 2; } srcSize = sizeof(preferredWorkGroupSizeMultiple); diff --git a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp index 09bcf5a322..3b12977fd1 100644 --- a/opencl/test/unit_test/command_queue/local_work_size_tests.cpp +++ b/opencl/test/unit_test/command_queue/local_work_size_tests.cpp @@ -32,8 +32,12 @@ TEST(localWorkSizeTest, givenDisableEUFusionWhenCreatingWorkSizeInfoThenCorrectM true // disableEUFusion ); + auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); + bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true); + auto WGSMultiple = fusedDispatchEnabled ? 2 : 1; + uint32_t maxBarriersPerHSlice = (defaultHwInfo.get()->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16; - uint32_t expectedMinWGS = simdSize * numThreadsPerSubS / maxBarriersPerHSlice; + uint32_t expectedMinWGS = WGSMultiple * simdSize * numThreadsPerSubS / maxBarriersPerHSlice; EXPECT_EQ(expectedMinWGS, wsInfo.minWorkGroupSize); } diff --git a/opencl/test/unit_test/device/device_caps_tests.cpp b/opencl/test/unit_test/device/device_caps_tests.cpp index 164110d45c..dfd193a61d 100644 --- a/opencl/test/unit_test/device/device_caps_tests.cpp +++ b/opencl/test/unit_test/device/device_caps_tests.cpp @@ -242,7 +242,7 @@ TEST_F(DeviceGetCapsTest, WhenCreatingDeviceThenCapsArePopulatedCorrectly) { EXPECT_EQ(64u, caps.preferredPlatformAtomicAlignment); EXPECT_TRUE(caps.nonUniformWorkGroupSupport); - auto expectedPreferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo) + auto expectedPreferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, false) ? CommonConstants::maximalSimdSize * 2 : CommonConstants::maximalSimdSize; EXPECT_EQ(expectedPreferredWorkGroupSizeMultiple, caps.preferredWorkGroupSizeMultiple); diff --git a/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl index 5a35b4c40e..4e1c154fef 100644 --- a/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl @@ -195,7 +195,7 @@ GEN12LPTEST_F(HwHelperTestGen12Lp, givenTgllpWhenIsFusedEuDispatchEnabledIsCalle for (auto &[expectedResult, wa, debugKey] : testParams) { waTable.flags.waDisableFusedThreadScheduling = wa; DebugManager.flags.CFEFusedEUDispatch.set(debugKey); - EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo)); + EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo, false)); } } diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index 6025e217cd..1ebe9b0715 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -1061,7 +1061,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, WhenIsFusedEuDispatchEnabledIsCalledTh GTEST_SKIP(); } auto &helper = HwHelper::get(renderCoreFamily); - EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo)); + EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo, false)); } HWTEST_F(PipeControlHelperTests, WhenGettingPipeControSizeForCacheFlushThenReturnCorrectValue) { diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index a3eea2a9ba..cd874fff68 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -315,6 +315,10 @@ TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGro kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true; MockKernel kernel(pProgram, kernelInfo, *pClDevice); + auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); + bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true); + auto expectedValue = kernelInfo.getMaxSimdSize() * (fusedDispatchEnabled ? 2 : 1); + cl_kernel_info paramName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; size_t paramValue; size_t paramValueSize = sizeof(paramValue); @@ -328,9 +332,35 @@ TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGro EXPECT_EQ(CL_SUCCESS, retVal); EXPECT_EQ(paramValueSize, paramValueSizeRet); - EXPECT_EQ(kernelInfo.getMaxSimdSize(), paramValue); + EXPECT_EQ(expectedValue, paramValue); +} - kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = false; +TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) { + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.CFEFusedEUDispatch.set(0); + + KernelInfo kernelInfo = {}; + kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true; + MockKernel kernel(pProgram, kernelInfo, *pClDevice); + + auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); + bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true); + auto expectedValue = kernelInfo.getMaxSimdSize() * (fusedDispatchEnabled ? 2 : 1); + + cl_kernel_info paramName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE; + size_t paramValue; + size_t paramValueSize = sizeof(paramValue); + size_t paramValueSizeRet = 0; + + retVal = kernel.getWorkGroupInfo( + paramName, + paramValueSize, + ¶mValue, + ¶mValueSizeRet); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(paramValueSize, paramValueSizeRet); + + EXPECT_EQ(expectedValue, paramValue); } TEST_F(KernelTests, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValueErrorIsReturned) { diff --git a/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp b/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp index 572a8aacd9..334893d9a0 100644 --- a/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp +++ b/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp @@ -374,6 +374,27 @@ HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThe EXPECT_TRUE(cfeState->getFusedEuDispatch()); } +HWTEST2_F(PreambleCfeState, givenXehpEnabledFusedEuAndDisableFusedDispatchFromKernelWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.CFEFusedEUDispatch.set(0); + + auto hwInfo = *defaultHwInfo; + hwInfo.capabilityTable.fusedEuEnabled = true; + + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute); + StreamProperties streamProperties{}; + streamProperties.frontEndState.setProperties(false, true, false, false, hwInfo); + PreambleHelper::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties); + parseCommands(linearStream); + auto cfeStateIt = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), cfeStateIt); + auto cfeState = reinterpret_cast(*cfeStateIt); + + EXPECT_FALSE(cfeState->getFusedEuDispatch()); +} + HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) { using CFE_STATE = typename FamilyType::CFE_STATE; @@ -390,4 +411,4 @@ HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThen auto cfeState = reinterpret_cast(*cfeStateIt); EXPECT_FALSE(cfeState->getFusedEuDispatch()); -} \ No newline at end of file +} diff --git a/shared/source/gen12lp/hw_helper_gen12lp.cpp b/shared/source/gen12lp/hw_helper_gen12lp.cpp index 1fdeb6ca0d..367a98bb6c 100644 --- a/shared/source/gen12lp/hw_helper_gen12lp.cpp +++ b/shared/source/gen12lp/hw_helper_gen12lp.cpp @@ -20,6 +20,20 @@ using Family = NEO::TGLLPFamily; namespace NEO { +template <> +inline bool HwHelperHw::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const { + auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.flags.waDisableFusedThreadScheduling; + fusedEuDispatchEnabled &= hwInfo.capabilityTable.fusedEuEnabled; + + if (disableEUFusionForKernel) + fusedEuDispatchEnabled = false; + + if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) { + fusedEuDispatchEnabled = (DebugManager.flags.CFEFusedEUDispatch.get() == 0); + } + return fusedEuDispatchEnabled; +} + template <> size_t HwHelperHw::getMax3dImageWidthOrHeight() const { return 2048; diff --git a/shared/source/gen12lp/preamble_gen12lp.cpp b/shared/source/gen12lp/preamble_gen12lp.cpp index 1530c36ca6..2d0e3fa5df 100644 --- a/shared/source/gen12lp/preamble_gen12lp.cpp +++ b/shared/source/gen12lp/preamble_gen12lp.cpp @@ -83,7 +83,7 @@ uint32_t PreambleHelper::getUrbEntryAllocationSize() { template <> void PreambleHelper::programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo, bool disableEUFusion) { auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - if (!hwHelper.isFusedEuDispatchEnabled(hwInfo) || disableEUFusion) { + if (!hwHelper.isFusedEuDispatchEnabled(hwInfo, disableEUFusion)) { mediaVfeState->setDisableSlice0Subslice2(true); } if (DebugManager.flags.MediaVfeStateMaxSubSlices.get() != -1) { diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 4a20347830..f2a210f81c 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -98,7 +98,7 @@ class HwHelper { virtual uint32_t getMinimalSIMDSize() = 0; virtual bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const = 0; virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0; - virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const = 0; + virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const = 0; virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const = 0; virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0; virtual void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const = 0; @@ -301,7 +301,7 @@ class HwHelperHw : public HwHelper { bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override; - bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const override; + bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const override; static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo); diff --git a/shared/source/helpers/hw_helper_bdw_to_icllp.inl b/shared/source/helpers/hw_helper_bdw_to_icllp.inl index 89a88e0766..bb8d4f3a47 100644 --- a/shared/source/helpers/hw_helper_bdw_to_icllp.inl +++ b/shared/source/helpers/hw_helper_bdw_to_icllp.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,7 +11,7 @@ namespace NEO { template -inline bool HwHelperHw::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const { +inline bool HwHelperHw::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const { return false; } diff --git a/shared/source/helpers/hw_helper_tgllp_and_later.inl b/shared/source/helpers/hw_helper_tgllp_and_later.inl index 864afc6207..a58796215c 100644 --- a/shared/source/helpers/hw_helper_tgllp_and_later.inl +++ b/shared/source/helpers/hw_helper_tgllp_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -11,7 +11,7 @@ namespace NEO { template -inline bool HwHelperHw::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const { +inline bool HwHelperHw::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const { auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.flags.waDisableFusedThreadScheduling; fusedEuDispatchEnabled &= hwInfo.capabilityTable.fusedEuEnabled; diff --git a/shared/source/program/kernel_info.cpp b/shared/source/program/kernel_info.cpp index cb242f64cb..acb145878e 100644 --- a/shared/source/program/kernel_info.cpp +++ b/shared/source/program/kernel_info.cpp @@ -53,6 +53,7 @@ void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) { } } } + void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableEUFusion) { minWorkGroupSize = 0; if (hasBarriers) { @@ -65,10 +66,11 @@ void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableE } const auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); - if (hwHelper.isFusedEuDispatchEnabled(*hwInfo) && !disableEUFusion) { + if (hwHelper.isFusedEuDispatchEnabled(*hwInfo, disableEUFusion)) { minWorkGroupSize *= 2; } } + void WorkSizeInfo::checkRatio(const size_t workItems[3]) { if (slmTotalSize > 0) { useRatio = true; diff --git a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp index 7e816fc08b..a7f7c76031 100644 --- a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -36,7 +36,7 @@ void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, c } auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - if (!hwHelper.isFusedEuDispatchEnabled(hwInfo)) { + if (!hwHelper.isFusedEuDispatchEnabled(hwInfo, false)) { command->setFusedEuDispatch(true); } diff --git a/shared/source/xe_hpg_core/hw_helper_xe_hpg_core.cpp b/shared/source/xe_hpg_core/hw_helper_xe_hpg_core.cpp index ab236e63f2..d096f98768 100644 --- a/shared/source/xe_hpg_core/hw_helper_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/hw_helper_xe_hpg_core.cpp @@ -22,6 +22,20 @@ namespace NEO { template <> const AuxTranslationMode HwHelperHw::defaultAuxTranslationMode = AuxTranslationMode::Blit; +template <> +inline bool HwHelperHw::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const { + auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.flags.waDisableFusedThreadScheduling; + fusedEuDispatchEnabled &= hwInfo.capabilityTable.fusedEuEnabled; + + if (disableEUFusionForKernel) + fusedEuDispatchEnabled = false; + + if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) { + fusedEuDispatchEnabled = (DebugManager.flags.CFEFusedEUDispatch.get() == 0); + } + return fusedEuDispatchEnabled; +} + template <> uint32_t HwHelperHw::getMetricsLibraryGenId() const { return static_cast(MetricsLibraryApi::ClientGen::XeHPG); diff --git a/shared/source/xe_hpg_core/preamble_xe_hpg_core.cpp b/shared/source/xe_hpg_core/preamble_xe_hpg_core.cpp index 9b96ea0466..36e4f80cc7 100644 --- a/shared/source/xe_hpg_core/preamble_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/preamble_xe_hpg_core.cpp @@ -29,7 +29,7 @@ void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, c } auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - if (!hwHelper.isFusedEuDispatchEnabled(hwInfo) || streamProperties.frontEndState.disableEUFusion.value == 1) { + if (!hwHelper.isFusedEuDispatchEnabled(hwInfo, streamProperties.frontEndState.disableEUFusion.value == 1)) { command->setFusedEuDispatch(true); } diff --git a/shared/test/common/gen12lp/test_preamble_gen12lp.cpp b/shared/test/common/gen12lp/test_preamble_gen12lp.cpp index 4aa27f6c74..2c02d8f8cc 100644 --- a/shared/test/common/gen12lp/test_preamble_gen12lp.cpp +++ b/shared/test/common/gen12lp/test_preamble_gen12lp.cpp @@ -178,6 +178,19 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenDisableEUFusionWhenProgramAdditionalFiel EXPECT_TRUE(pMediaVfeState->getDisableSlice0Subslice2()); } +HWTEST2_F(Gen12LpPreambleVfeState, givenDisableEUFusionAndCFEFusedEUDispatchWhenProgramAdditionalFieldsInVfeStateThenCorrectFieldIsSet, IsTGLLP) { + using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; + + DebugManagerStateRestore dbgRestorer; + DebugManager.flags.CFEFusedEUDispatch.set(0); + + auto pHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); + auto pMediaVfeState = reinterpret_cast(linearStream.getSpace(sizeof(MEDIA_VFE_STATE))); + *pMediaVfeState = FamilyType::cmdInitMediaVfeState; + PreambleHelper::programAdditionalFieldsInVfeState(pMediaVfeState, *pHwInfo, true); + EXPECT_FALSE(pMediaVfeState->getDisableSlice0Subslice2()); +} + typedef PreambleFixture ThreadArbitrationGen12Lp; GEN12LPTEST_F(ThreadArbitrationGen12Lp, whenGetDefaultThreadArbitrationPolicyIsCalledThenCorrectPolicyIsReturned) { EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, HwHelperHw::get().getDefaultThreadArbitrationPolicy());