From bbe599aa95ede8cd52407d812870f1a91998576f Mon Sep 17 00:00:00 2001 From: Dominik Dabek Date: Tue, 6 Jul 2021 10:14:23 +0000 Subject: [PATCH] Selectively enable getMaxThreadsForWorkgroup WA Related-To: NEO-6022 Signed-off-by: Dominik Dabek --- opencl/source/kernel/kernel.cpp | 4 ++- opencl/test/unit_test/kernel/kernel_tests.cpp | 5 ++- .../os_interface/hw_info_config_tests.cpp | 9 ++++- .../linux/hw_info_config_linux_tests.cpp | 20 +++++++++++ .../windows/hw_info_config_win_tests.cpp | 20 +++++++++++ .../xe_hp_core/hw_helper_tests_xe_hp_core.cpp | 11 ++++++ .../xehp/test_hw_info_config_xehp.inl | 1 + shared/source/device/device_caps.cpp | 2 +- shared/source/helpers/hw_helper.cpp | 5 --- shared/source/helpers/hw_helper.h | 4 --- shared/source/helpers/hw_helper_bdw_plus.inl | 5 --- shared/source/helpers/hw_helper_xehp_plus.inl | 8 ----- shared/source/os_interface/hw_info_config.h | 6 ++++ shared/source/os_interface/hw_info_config.inl | 10 ++++++ .../os_interface/hw_info_config_bdw_plus.inl | 5 +++ .../os_interface/hw_info_config_xehp_plus.inl | 8 +++++ .../xe_hp_core/linux/hw_info_config_xehp.inl | 1 + .../os_agnostic_hw_info_config_xe_hp_core.inl | 15 ++++++++ .../windows/hw_info_config_xe_hp_core.cpp | 1 + shared/test/common/xe_hp_core/CMakeLists.txt | 1 + .../test_hw_info_config_xe_hp_core.cpp | 34 +++++++++++++++++++ 21 files changed, 149 insertions(+), 26 deletions(-) create mode 100644 shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl create mode 100644 shared/test/common/xe_hp_core/test_hw_info_config_xe_hp_core.cpp diff --git a/opencl/source/kernel/kernel.cpp b/opencl/source/kernel/kernel.cpp index 9a68065b41..f4342a4ec8 100644 --- a/opencl/source/kernel/kernel.cpp +++ b/opencl/source/kernel/kernel.cpp @@ -24,6 +24,7 @@ #include "shared/source/kernel/kernel_arg_descriptor_extended_vme.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/source/os_interface/hw_info_config.h" #include "opencl/source/accelerators/intel_accelerator.h" #include "opencl/source/accelerators/intel_motion_estimation.h" @@ -78,7 +79,8 @@ Kernel::Kernel(Program *programArg, const KernelInfo &kernelInfoArg, ClDevice &c imageTransformer.reset(new ImageTransformer); if (kernelInfoArg.kernelDescriptor.kernelAttributes.simdSize == 1u) { auto deviceInfo = getDevice().getDevice().getDeviceInfo(); - maxKernelWorkGroupSize = HwHelper::get(getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); + auto &hwInfoConfig = *HwInfoConfig::get(getHardwareInfo().platform.eProductFamily); + maxKernelWorkGroupSize = hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); } else { maxKernelWorkGroupSize = static_cast(clDevice.getSharedDeviceInfo().maxWorkGroupSize); } diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 9e33d231b6..e5c496e23f 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -13,6 +13,7 @@ #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/unified_memory_manager.h" +#include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_graphics_allocation.h" @@ -3168,7 +3169,9 @@ TEST_F(KernelTests, givenKernelWithSimdEqual1WhenKernelCreatedThenMaxWorgGroupSi auto deviceMaxWorkGroupSize = pDevice->getDeviceInfo().maxWorkGroupSize; auto deviceInfo = pClDevice->getDevice().getDeviceInfo(); - auto maxThreadsPerWG = HwHelper::get(pKernel->getHardwareInfo().platform.eRenderCoreFamily).getMaxThreadsForWorkgroupInDSSOrSS(pKernel->getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); + + auto &hwInfoConfig = *HwInfoConfig::get(pKernel->getHardwareInfo().platform.eProductFamily); + auto maxThreadsPerWG = hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(pKernel->getHardwareInfo(), static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)); EXPECT_LT(pKernel->getMaxKernelWorkGroupSize(), deviceMaxWorkGroupSize); EXPECT_EQ(pKernel->getMaxKernelWorkGroupSize(), maxThreadsPerWG); diff --git a/opencl/test/unit_test/os_interface/hw_info_config_tests.cpp b/opencl/test/unit_test/os_interface/hw_info_config_tests.cpp index dcbd47e344..7669b6471a 100644 --- a/opencl/test/unit_test/os_interface/hw_info_config_tests.cpp +++ b/opencl/test/unit_test/os_interface/hw_info_config_tests.cpp @@ -140,9 +140,16 @@ HWTEST_F(HwInfoConfigTest, givenSamplerStateWhenAdjustSamplerStateThenNothingIsC EXPECT_EQ(0, memcmp(&initialState, &state, sizeof(SAMPLER_STATE))); } -HWTEST_F(HwInfoConfigTest, whenCallingIsAdditionalStateBaseAddressWARequiredThenFalseIsReturned) { +HWTEST_F(HwInfoConfigTest, givenHardwareInfoWhenCallingIsAdditionalStateBaseAddressWARequiredThenFalseIsReturned) { auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); bool ret = hwInfoConfig->isAdditionalStateBaseAddressWARequired(pInHwInfo); EXPECT_FALSE(ret); } + +HWTEST_F(HwInfoConfigTest, givenHardwareInfoWhenCallingIsMaxThreadsForWorkgroupWARequiredThenFalseIsReturned) { + auto hwInfoConfig = HwInfoConfig::get(pInHwInfo.platform.eProductFamily); + bool ret = hwInfoConfig->isMaxThreadsForWorkgroupWARequired(pInHwInfo); + + EXPECT_FALSE(ret); +} diff --git a/opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.cpp b/opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.cpp index 744c949c1e..008869da03 100644 --- a/opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/hw_info_config_linux_tests.cpp @@ -94,6 +94,21 @@ bool HwInfoConfigHw::isAdditionalStateBaseAddressWARequired(const return false; } +template <> +bool HwInfoConfigHw::isMaxThreadsForWorkgroupWARequired(const HardwareInfo &hwInfo) const { + return false; +} + +template <> +uint32_t HwInfoConfigHw::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const { + return 0; +} + +template <> +uint32_t HwInfoConfigHw::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const { + return 0; +} + } // namespace NEO struct DummyHwConfig : HwInfoConfigHw { @@ -550,6 +565,11 @@ HWTEST_F(HwInfoConfigTestLinuxDummy, givenHardwareInfoWhenCallingIsAdditionalSta EXPECT_FALSE(ret); } +HWTEST_F(HwInfoConfigTestLinuxDummy, givenHardwareInfoWhenCallingIsMaxThreadsForWorkgroupWARequiredThenFalseIsReturned) { + bool ret = hwConfig.isMaxThreadsForWorkgroupWARequired(outHwInfo); + EXPECT_FALSE(ret); +} + using HwConfigLinux = ::testing::Test; HWTEST2_F(HwConfigLinux, GivenDifferentValuesFromTopologyQueryWhenConfiguringHwInfoThenMaxSlicesSupportedSetToAvailableCountInGtSystemInfo, MatchAny) { diff --git a/opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.cpp b/opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.cpp index 34947d9fbe..6a852303c1 100644 --- a/opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.cpp +++ b/opencl/test/unit_test/os_interface/windows/hw_info_config_win_tests.cpp @@ -72,6 +72,21 @@ bool HwInfoConfigHw::isAdditionalStateBaseAddressWARequired(const return false; } +template <> +bool HwInfoConfigHw::isMaxThreadsForWorkgroupWARequired(const HardwareInfo &hwInfo) const { + return false; +} + +template <> +uint32_t HwInfoConfigHw::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const { + return 0; +} + +template <> +uint32_t HwInfoConfigHw::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const { + return 0; +} + HwInfoConfigTestWindows::HwInfoConfigTestWindows() { this->executionEnvironment = std::make_unique(); this->rootDeviceEnvironment = std::make_unique(*executionEnvironment); @@ -127,6 +142,11 @@ HWTEST_F(HwInfoConfigTestWindows, givenHardwareInfoWhenCallingIsAdditionalStateB EXPECT_FALSE(ret); } +HWTEST_F(HwInfoConfigTestWindows, givenHardwareInfoWhenCallingIsMaxThreadsForWorkgroupWARequiredThenFalseIsReturned) { + bool ret = hwConfig.isMaxThreadsForWorkgroupWARequired(outHwInfo); + EXPECT_FALSE(ret); +} + HWTEST_F(HwInfoConfigTestWindows, givenFtrIaCoherencyFlagWhenConfiguringHwInfoThenSetCoherencySupportCorrectly) { HardwareInfo initialHwInfo = *defaultHwInfo; auto &hwHelper = HwHelper::get(initialHwInfo.platform.eRenderCoreFamily); diff --git a/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp b/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp index 0c53001069..7a22b6998d 100644 --- a/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp +++ b/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp @@ -106,6 +106,17 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenRevisionEnumAndPlatformFamilyType } } +XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenRevisionEnumThenProperMaxThreadsForWorkgroupIsReturned) { + HwHelper &hwHelper = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily); + auto &hwInfoConfig = *HwInfoConfig::get(hardwareInfo.platform.eProductFamily); + hardwareInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(REVISION_A0, hardwareInfo); + EXPECT_EQ(64u, hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(hardwareInfo, 64u, 64u)); + + hardwareInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(REVISION_B, hardwareInfo); + uint32_t numThreadsPerEU = hardwareInfo.gtSystemInfo.ThreadCount / hardwareInfo.gtSystemInfo.EUCount; + EXPECT_EQ(64u * numThreadsPerEU, hwInfoConfig.getMaxThreadsForWorkgroupInDSSOrSS(hardwareInfo, 64u, 64u)); +} + XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDisablePipeControlFlagIsDefaultWhenLocalMemoryIsEnabledThenReturnFalseAndDoNotProgramPipeControl) { hardwareInfo.featureTable.ftrLocalMemory = true; diff --git a/opencl/test/unit_test/xe_hp_core/xehp/test_hw_info_config_xehp.inl b/opencl/test/unit_test/xe_hp_core/xehp/test_hw_info_config_xehp.inl index 52df4a2e19..5ace685398 100644 --- a/opencl/test/unit_test/xe_hp_core/xehp/test_hw_info_config_xehp.inl +++ b/opencl/test/unit_test/xe_hp_core/xehp/test_hw_info_config_xehp.inl @@ -15,6 +15,7 @@ #include "test.h" HWTEST_EXCLUDE_PRODUCT(HwHelperTest, WhenAllowRenderCompressionIsCalledThenTrueIsReturned, IGFX_XE_HP_SDV); +HWTEST_EXCLUDE_PRODUCT(HwInfoConfigTest, givenHardwareInfoWhenCallingIsMaxThreadsForWorkgroupWARequiredThenFalseIsReturned, IGFX_XE_HP_SDV); using namespace NEO; diff --git a/shared/source/device/device_caps.cpp b/shared/source/device/device_caps.cpp index 0349ee45da..75fc175713 100644 --- a/shared/source/device/device_caps.cpp +++ b/shared/source/device/device_caps.cpp @@ -117,7 +117,7 @@ void Device::initializeCaps() { } deviceInfo.numThreadsPerEU = systemInfo.ThreadCount / systemInfo.EUCount; deviceInfo.threadsPerEUConfigs = hwHelper.getThreadsPerEUConfigs(); - auto maxWS = hwHelper.getMaxThreadsForWorkgroupInDSSOrSS(hwInfo, static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)) * simdSizeUsed; + auto maxWS = hwInfoConfig->getMaxThreadsForWorkgroupInDSSOrSS(hwInfo, static_cast(deviceInfo.maxNumEUsPerSubSlice), static_cast(deviceInfo.maxNumEUsPerDualSubSlice)) * simdSizeUsed; maxWS = Math::prevPowerOfTwo(maxWS); deviceInfo.maxWorkGroupSize = std::min(maxWS, 1024u); diff --git a/shared/source/helpers/hw_helper.cpp b/shared/source/helpers/hw_helper.cpp index 81130d51df..9780437370 100644 --- a/shared/source/helpers/hw_helper.cpp +++ b/shared/source/helpers/hw_helper.cpp @@ -55,11 +55,6 @@ uint32_t HwHelper::getMaxThreadsForVfe(const HardwareInfo &hwInfo) { return maxHwThreadsReturned; } -uint32_t HwHelper::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const { - uint32_t numThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; - return maxNumEUsPerSubSlice * numThreadsPerEU; -} - uint32_t HwHelper::getSubDevicesCount(const HardwareInfo *pHwInfo) { if (DebugManager.flags.CreateMultipleSubDevices.get() > 0) { return DebugManager.flags.CreateMultipleSubDevices.get(); diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index 211f55ca40..65975bbe0d 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -96,8 +96,6 @@ class HwHelper { virtual bool getEnableLocalMemory(const HardwareInfo &hwInfo) const = 0; virtual std::string getExtensions() const = 0; static uint32_t getMaxThreadsForVfe(const HardwareInfo &hwInfo); - virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const; - virtual uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const = 0; virtual uint32_t getMetricsLibraryGenId() const = 0; virtual uint32_t getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const = 0; virtual bool tilingAllowed(bool isSharedContext, bool isImage1d, bool forceLinearStorage) = 0; @@ -212,8 +210,6 @@ class HwHelperHw : public HwHelper { size_t getPaddingForISAAllocation() const override; - uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const override; - uint32_t getComputeUnitsUsedForScratch(const HardwareInfo *pHwInfo) const override; uint32_t getPitchAlignmentForImage(const HardwareInfo *hwInfo) const override; diff --git a/shared/source/helpers/hw_helper_bdw_plus.inl b/shared/source/helpers/hw_helper_bdw_plus.inl index a9bbf7f02d..98c9c88633 100644 --- a/shared/source/helpers/hw_helper_bdw_plus.inl +++ b/shared/source/helpers/hw_helper_bdw_plus.inl @@ -110,11 +110,6 @@ uint32_t HwHelperHw::getPlanarYuvMaxHeight() const { return planarYuvMaxHeight; } -template -uint32_t HwHelperHw::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const { - return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice); -} - template aub_stream::MMIOList HwHelperHw::getExtraMmioList(const HardwareInfo &hwInfo, const GmmHelper &gmmHelper) const { return {}; diff --git a/shared/source/helpers/hw_helper_xehp_plus.inl b/shared/source/helpers/hw_helper_xehp_plus.inl index 0e685d7706..90c7fbb2ff 100644 --- a/shared/source/helpers/hw_helper_xehp_plus.inl +++ b/shared/source/helpers/hw_helper_xehp_plus.inl @@ -193,12 +193,4 @@ inline bool HwHelperHw::preferSmallWorkgroupSizeForKernel(const size_ return true; } -template -inline uint32_t HwHelperHw::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const { - if (isWorkaroundRequired(REVISION_A0, REVISION_B, hwInfo)) { - return std::min(HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice), 64u); - } - return HwHelper::getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice); -} - } // namespace NEO diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index 28d492ec78..d0ccbd1465 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -39,6 +39,9 @@ class HwInfoConfig { virtual void convertTimestampsFromOaToCsDomain(uint64_t ×tampData) = 0; virtual uint32_t getDeviceMemoryMaxClkRate(const HardwareInfo *hwInfo) = 0; virtual bool isAdditionalStateBaseAddressWARequired(const HardwareInfo &hwInfo) const = 0; + virtual bool isMaxThreadsForWorkgroupWARequired(const HardwareInfo &hwInfo) const = 0; + virtual uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const = 0; + virtual uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const = 0; uint32_t threadsPerEu; }; @@ -61,6 +64,9 @@ class HwInfoConfigHw : public HwInfoConfig { void convertTimestampsFromOaToCsDomain(uint64_t ×tampData) override; uint32_t getDeviceMemoryMaxClkRate(const HardwareInfo *hwInfo) override; bool isAdditionalStateBaseAddressWARequired(const HardwareInfo &hwInfo) const override; + bool isMaxThreadsForWorkgroupWARequired(const HardwareInfo &hwInfo) const override; + uint32_t getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const override; + uint32_t getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const override; protected: HwInfoConfigHw() = default; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index 0699fd225b..b69e82fb67 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -85,4 +85,14 @@ bool HwInfoConfigHw::isAdditionalStateBaseAddressWARequired(const Ha return false; } +template +bool HwInfoConfigHw::isMaxThreadsForWorkgroupWARequired(const HardwareInfo &hwInfo) const { + return false; +} + +template +uint32_t HwInfoConfigHw::getMaxThreadsForWorkgroup(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice) const { + uint32_t numThreadsPerEU = hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount; + return maxNumEUsPerSubSlice * numThreadsPerEU; +} } // namespace NEO diff --git a/shared/source/os_interface/hw_info_config_bdw_plus.inl b/shared/source/os_interface/hw_info_config_bdw_plus.inl index 91611a560e..f721178a8b 100644 --- a/shared/source/os_interface/hw_info_config_bdw_plus.inl +++ b/shared/source/os_interface/hw_info_config_bdw_plus.inl @@ -25,4 +25,9 @@ void HwInfoConfigHw::enableRenderCompression(HardwareInfo *hwInfo) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = hwInfo->featureTable.ftrE2ECompression; } +template +uint32_t HwInfoConfigHw::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const { + return getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerSubSlice); +} + } // namespace NEO diff --git a/shared/source/os_interface/hw_info_config_xehp_plus.inl b/shared/source/os_interface/hw_info_config_xehp_plus.inl index 76bb6b6193..1e4565192a 100644 --- a/shared/source/os_interface/hw_info_config_xehp_plus.inl +++ b/shared/source/os_interface/hw_info_config_xehp_plus.inl @@ -24,4 +24,12 @@ void HwInfoConfigHw::enableRenderCompression(HardwareInfo *hwInfo) { hwInfo->capabilityTable.ftrRenderCompressedBuffers = hwInfo->featureTable.ftrE2ECompression; } +template +uint32_t HwInfoConfigHw::getMaxThreadsForWorkgroupInDSSOrSS(const HardwareInfo &hwInfo, uint32_t maxNumEUsPerSubSlice, uint32_t maxNumEUsPerDualSubSlice) const { + if (isMaxThreadsForWorkgroupWARequired(hwInfo)) { + return std::min(getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice), 64u); + } + return getMaxThreadsForWorkgroup(hwInfo, maxNumEUsPerDualSubSlice); +} + } // namespace NEO diff --git a/shared/source/xe_hp_core/linux/hw_info_config_xehp.inl b/shared/source/xe_hp_core/linux/hw_info_config_xehp.inl index bc35d095b5..af3379ec91 100644 --- a/shared/source/xe_hp_core/linux/hw_info_config_xehp.inl +++ b/shared/source/xe_hp_core/linux/hw_info_config_xehp.inl @@ -9,6 +9,7 @@ #include "shared/source/helpers/hw_info.h" #include "shared/source/kernel/kernel_properties.h" #include "shared/source/os_interface/hw_info_config.h" +#include "shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl" namespace NEO { template <> diff --git a/shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl b/shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl new file mode 100644 index 0000000000..e747d1fd15 --- /dev/null +++ b/shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +using namespace NEO; + +template <> +bool HwInfoConfigHw::isMaxThreadsForWorkgroupWARequired(const HardwareInfo &hwInfo) const { + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + uint32_t stepping = hwHelper.getSteppingFromHwRevId(hwInfo); + return REVISION_A0 == stepping; +} \ No newline at end of file diff --git a/shared/source/xe_hp_core/windows/hw_info_config_xe_hp_core.cpp b/shared/source/xe_hp_core/windows/hw_info_config_xe_hp_core.cpp index 467d05c8ca..5eb54d919c 100644 --- a/shared/source/xe_hp_core/windows/hw_info_config_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/windows/hw_info_config_xe_hp_core.cpp @@ -16,6 +16,7 @@ namespace NEO { #ifdef SUPPORT_XEHP +#include "shared/source/xe_hp_core/os_agnostic_hw_info_config_xe_hp_core.inl" template <> int HwInfoConfigHw::configureHardwareCustom(HardwareInfo *hwInfo, OSInterface *osIface) { auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); diff --git a/shared/test/common/xe_hp_core/CMakeLists.txt b/shared/test/common/xe_hp_core/CMakeLists.txt index a998c262eb..b4376edaa3 100644 --- a/shared/test/common/xe_hp_core/CMakeLists.txt +++ b/shared/test/common/xe_hp_core/CMakeLists.txt @@ -16,6 +16,7 @@ if(TESTS_XE_HP_CORE) target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt ${CMAKE_CURRENT_SOURCE_DIR}/unit_test_helper_xe_hp_core.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_hw_info_config_xe_hp_core.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_xe_hp_core.cpp ${COMPUTE_RUNTIME_ULT_XE_HP_CORE} ${NEO_CORE_TESTS_XE_HP_CORE} diff --git a/shared/test/common/xe_hp_core/test_hw_info_config_xe_hp_core.cpp b/shared/test/common/xe_hp_core/test_hw_info_config_xe_hp_core.cpp new file mode 100644 index 0000000000..d96adfe274 --- /dev/null +++ b/shared/test/common/xe_hp_core/test_hw_info_config_xe_hp_core.cpp @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2021 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/helpers/hw_helper.h" +#include "shared/source/os_interface/hw_info_config.h" +#include "shared/test/common/fixtures/device_fixture.h" + +#include "test.h" + +using namespace NEO; + +using XeHPHwInfoConfig = Test; + +XEHPTEST_F(XeHPHwInfoConfig, givenXEHPWithA0SteppingThenMaxThreadsForWorkgroupWAIsRequired) { + auto hwInfoConfig = HwInfoConfig::get(productFamily); + auto hwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); + hwInfo->platform.usRevId = hwHelper.getHwRevIdFromStepping(REVISION_A0, *hwInfo); + auto isWARequired = hwInfoConfig->isMaxThreadsForWorkgroupWARequired(pDevice->getHardwareInfo()); + EXPECT_TRUE(isWARequired); +} + +XEHPTEST_F(XeHPHwInfoConfig, givenXEHPWithBSteppingThenMaxThreadsForWorkgroupWAIsNotRequired) { + auto hwInfoConfig = HwInfoConfig::get(productFamily); + auto hwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); + auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily); + hwInfo->platform.usRevId = hwHelper.getHwRevIdFromStepping(REVISION_B, *hwInfo); + auto isWARequired = hwInfoConfig->isMaxThreadsForWorkgroupWARequired(pDevice->getHardwareInfo()); + EXPECT_FALSE(isWARequired); +} \ No newline at end of file