diff --git a/shared/source/gen12lp/gfx_core_helper_gen12lp.cpp b/shared/source/gen12lp/gfx_core_helper_gen12lp.cpp index ccb929e4f3..1e9756a18c 100644 --- a/shared/source/gen12lp/gfx_core_helper_gen12lp.cpp +++ b/shared/source/gen12lp/gfx_core_helper_gen12lp.cpp @@ -155,7 +155,7 @@ uint32_t GfxCoreHelperHw::getComputeUnitsUsedForScratch(const RootDevice ThreadCount/EUCount=7 is no longer valid, so we have to force 8 in below formula. This is required to allocate enough scratch space. */ auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); - return hwInfo->gtSystemInfo.MaxSubSlicesSupported * hwInfo->gtSystemInfo.MaxEuPerSubSlice * 8; + return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(*hwInfo) * hwInfo->gtSystemInfo.MaxEuPerSubSlice * 8; } template <> diff --git a/shared/source/os_interface/linux/drm_neo.cpp b/shared/source/os_interface/linux/drm_neo.cpp index 4800796be0..e82f5c5767 100644 --- a/shared/source/os_interface/linux/drm_neo.cpp +++ b/shared/source/os_interface/linux/drm_neo.cpp @@ -534,10 +534,6 @@ int Drm::setupHardwareInfo(const DeviceDescriptor *device, bool setupFeatureTabl printDebugString(debugManager.flags.PrintDebugMessages.get(), stderr, "%s", "WARNING: Failed to query engine info\n"); } - if (!hwInfo->gtSystemInfo.L3BankCount) { - hwInfo->gtSystemInfo.L3BankCount = hwInfo->gtSystemInfo.MaxDualSubSlicesSupported; - } - DrmQueryTopologyData topologyData = {}; if (!queryTopology(*hwInfo, topologyData)) { @@ -627,11 +623,14 @@ int Drm::setupHardwareInfo(const DeviceDescriptor *device, bool setupFeatureTabl hwInfo->gtSystemInfo.MaxSlicesSupported = hwInfo->gtSystemInfo.SliceCount; - auto calculatedMaxSubSliceCount = topologyData.maxSlices * topologyData.maxSubSlicesPerSlice; - auto maxSubSliceCount = std::max(static_cast(calculatedMaxSubSliceCount), hwInfo->gtSystemInfo.MaxSubSlicesSupported); + if (topologyData.maxSubSlicesPerSlice != 0) { + hwInfo->gtSystemInfo.MaxSubSlicesSupported = hwInfo->gtSystemInfo.MaxSlicesSupported * topologyData.maxSubSlicesPerSlice; + hwInfo->gtSystemInfo.MaxDualSubSlicesSupported = hwInfo->gtSystemInfo.MaxSlicesSupported * topologyData.maxSubSlicesPerSlice; + } - hwInfo->gtSystemInfo.MaxSubSlicesSupported = maxSubSliceCount; - hwInfo->gtSystemInfo.MaxDualSubSlicesSupported = maxSubSliceCount; + if (!hwInfo->gtSystemInfo.L3BankCount) { + hwInfo->gtSystemInfo.L3BankCount = hwInfo->gtSystemInfo.MaxDualSubSlicesSupported; + } if (topologyData.numL3Banks > 0) { hwInfo->gtSystemInfo.L3BankCount = topologyData.numL3Banks; diff --git a/shared/source/os_interface/product_helper.inl b/shared/source/os_interface/product_helper.inl index 3967fe4165..44f40024be 100644 --- a/shared/source/os_interface/product_helper.inl +++ b/shared/source/os_interface/product_helper.inl @@ -16,6 +16,7 @@ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/definitions/indirect_detection_versions.h" #include "shared/source/helpers/device_caps_reader.h" +#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/hw_mapper.h" #include "shared/source/helpers/kernel_helpers.h" @@ -458,7 +459,14 @@ bool ProductHelperHw::isDcFlushAllowed() const { template uint32_t ProductHelperHw::computeMaxNeededSubSliceSpace(const HardwareInfo &hwInfo) const { - return hwInfo.gtSystemInfo.MaxSubSlicesSupported; + const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo); + + UNRECOVERABLE_IF(highestEnabledSlice == 0); + UNRECOVERABLE_IF(hwInfo.gtSystemInfo.MaxSlicesSupported == 0); + auto subSlicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; + auto maxSubSlice = std::max(highestEnabledSlice * subSlicesPerSlice, hwInfo.gtSystemInfo.MaxSubSlicesSupported); + + return maxSubSlice; } template diff --git a/shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp b/shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp index 2791a7e6e1..8aef38016c 100644 --- a/shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/gfx_core_helper_xe_hpc_core.cpp @@ -276,8 +276,7 @@ uint32_t GfxCoreHelperHw::getComputeUnitsUsedForScratch(const RootDevice auto &helper = rootDeviceEnvironment.getHelper(); auto hwInfo = rootDeviceEnvironment.getHardwareInfo(); uint32_t threadEuRatio = helper.getThreadEuRatioForScratch(*hwInfo); - - return hwInfo->gtSystemInfo.MaxSubSlicesSupported * hwInfo->gtSystemInfo.MaxEuPerSubSlice * threadEuRatio; + return NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(*hwInfo) * hwInfo->gtSystemInfo.MaxEuPerSubSlice * threadEuRatio; } template <> diff --git a/shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl b/shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl index c3e45b0181..c05731e82b 100644 --- a/shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl +++ b/shared/source/xe_hpg_core/dg2/os_agnostic_product_helper_dg2.inl @@ -151,18 +151,6 @@ bool ProductHelperHw::isTile64With3DSurfaceOnBCSSupported(const Hard return true; } -template <> -uint32_t ProductHelperHw::computeMaxNeededSubSliceSpace(const HardwareInfo &hwInfo) const { - const uint32_t highestEnabledSlice = NEO::GfxCoreHelper::getHighestEnabledSlice(hwInfo); - - UNRECOVERABLE_IF(highestEnabledSlice == 0); - UNRECOVERABLE_IF(hwInfo.gtSystemInfo.MaxSlicesSupported == 0); - auto subSlicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported; - auto maxSubSlice = std::max(highestEnabledSlice * subSlicesPerSlice, hwInfo.gtSystemInfo.MaxSubSlicesSupported); - - return maxSubSlice; -} - template <> bool ProductHelperHw::isCpuCopyNecessary(const void *ptr, MemoryManager *memoryManager) const { if (memoryManager) { diff --git a/shared/test/unit_test/gen12lp/test_device_caps_gen12lp.inl b/shared/test/unit_test/gen12lp/test_device_caps_gen12lp.inl index 78ef09007e..8130446d8a 100644 --- a/shared/test/unit_test/gen12lp/test_device_caps_gen12lp.inl +++ b/shared/test/unit_test/gen12lp/test_device_caps_gen12lp.inl @@ -38,7 +38,7 @@ GEN12LPTEST_F(Gen12LpDeviceCaps, givenHwInfoWhenRequestedComputeUnitsUsedForScra const auto &hwInfo = pDevice->getHardwareInfo(); auto &gfxCoreHelperl = getHelper(); - uint32_t expectedValue = hwInfo.gtSystemInfo.MaxSubSlicesSupported * hwInfo.gtSystemInfo.MaxEuPerSubSlice * 8; + uint32_t expectedValue = NEO::GfxCoreHelper::getHighestEnabledDualSubSlice(hwInfo) * hwInfo.gtSystemInfo.MaxEuPerSubSlice * 8; EXPECT_EQ(expectedValue, gfxCoreHelperl.getComputeUnitsUsedForScratch(pDevice->getRootDeviceEnvironment())); EXPECT_EQ(expectedValue, pDevice->getDeviceInfo().computeUnitsUsedForScratch); diff --git a/shared/test/unit_test/os_interface/linux/drm_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_tests.cpp index bf5c892a5c..7a8f215482 100644 --- a/shared/test/unit_test/os_interface/linux/drm_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_tests.cpp @@ -1003,32 +1003,6 @@ TEST(DrmTest, whenImmediateVmBindExtIsEnabledThenUseVmBindImmediate) { } } -TEST(DrmQueryTest, GivenDrmWhenSetupHardwareInfoCalledThenCorrectMaxValuesInGtSystemInfoArePreservedAndIoctlHelperSet) { - DebugManagerStateRestore restore; - debugManager.flags.IgnoreProductSpecificIoctlHelper.set(true); - auto executionEnvironment = std::make_unique(); - - *executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo() = *NEO::defaultHwInfo.get(); - auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); - DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; - - drm.failRetTopology = true; - - drm.storedEUVal = 48; - drm.storedSSVal = 6; - hwInfo->gtSystemInfo.SliceCount = 2; - - auto setupHardwareInfo = [](HardwareInfo *, bool, const ReleaseHelper *) {}; - DeviceDescriptor device = {0, hwInfo, setupHardwareInfo}; - - drm.ioctlHelper.reset(); - drm.setupHardwareInfo(&device, false); - EXPECT_NE(nullptr, drm.getIoctlHelper()); - EXPECT_EQ(2u, hwInfo->gtSystemInfo.MaxSlicesSupported); - EXPECT_EQ(NEO::defaultHwInfo->gtSystemInfo.MaxSubSlicesSupported, hwInfo->gtSystemInfo.MaxSubSlicesSupported); - EXPECT_EQ(NEO::defaultHwInfo->gtSystemInfo.MaxEuPerSubSlice, hwInfo->gtSystemInfo.MaxEuPerSubSlice); -} - TEST(DrmQueryTest, GivenLessAvailableSubSlicesThanMaxSubSlicesWhenQueryingTopologyInfoThenCorrectMaxSubSliceCountIsSet) { auto executionEnvironment = std::make_unique(); @@ -2088,8 +2062,8 @@ TEST(DrmHwInfoTest, givenTopologyDataWithoutSystemInfoWhenSettingHwInfoThenCorre EXPECT_EQ(hwInfo->gtSystemInfo.L3BankCount, 3u); EXPECT_EQ(hwInfo->gtSystemInfo.MaxEuPerSubSlice, 9u); EXPECT_EQ(hwInfo->gtSystemInfo.MaxSlicesSupported, 2u); - EXPECT_EQ(hwInfo->gtSystemInfo.MaxSubSlicesSupported, 16u); - EXPECT_EQ(hwInfo->gtSystemInfo.MaxDualSubSlicesSupported, 16u); + EXPECT_EQ(hwInfo->gtSystemInfo.MaxSubSlicesSupported, 8u); + EXPECT_EQ(hwInfo->gtSystemInfo.MaxDualSubSlicesSupported, 8u); EXPECT_TRUE(hwInfo->gtSystemInfo.IsDynamicallyPopulated); @@ -2355,7 +2329,36 @@ TEST(DrmHwInfoTest, givenTopologyDataWithSingleSliceAndMoreSubslicesThanMaxSubsl EXPECT_GE(GfxCoreHelper::getHighestEnabledDualSubSlice(*hwInfo), 2u * ioctlHelper->topologyDataToSet.maxSubSlicesPerSlice); } -TEST(DrmHwInfoTest, givenTopologyDataWithoutL3BankCountWhenSettingHwInfoThenL3BankCountIsSetBasedOnMaxDualSubslicesBeforeQueryTopology) { +TEST(DrmHwInfoTest, givenFusedSlicesThenMaxSubSlicesSupportedisSetToOnlyActiveSubsliceCount) { + auto executionEnvironment = std::make_unique(); + DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; + + drm.ioctlHelper = std::make_unique(drm); + + auto ioctlHelper = static_cast(drm.ioctlHelper.get()); + + ioctlHelper->topologyDataToSet.sliceCount = 2; + ioctlHelper->topologyDataToSet.subSliceCount = 16; + ioctlHelper->topologyDataToSet.maxEusPerSubSlice = 8; + ioctlHelper->topologyDataToSet.euCount = 128; + ioctlHelper->topologyDataToSet.maxSubSlicesPerSlice = 8; + + auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); + + hwInfo->gtSystemInfo = {}; + + auto setupHardwareInfo = [](HardwareInfo *hwInfo, bool, const ReleaseHelper *) { + hwInfo->gtSystemInfo.MaxSubSlicesSupported = 32; + hwInfo->gtSystemInfo.MaxDualSubSlicesSupported = 32; + }; + DeviceDescriptor device = {0, hwInfo, setupHardwareInfo}; + + EXPECT_EQ(0, drm.setupHardwareInfo(&device, false)); + EXPECT_EQ(hwInfo->gtSystemInfo.MaxSubSlicesSupported, 16u); + EXPECT_EQ(hwInfo->gtSystemInfo.MaxDualSubSlicesSupported, 16u); +} + +TEST(DrmHwInfoTest, givenTopologyDataWithoutL3BankCountWhenSettingHwInfoThenL3BankCountIsSetBasedOnMaxDualSubslices) { auto executionEnvironment = std::make_unique(); DrmMock drm{*executionEnvironment->rootDeviceEnvironments[0]}; @@ -2368,7 +2371,7 @@ TEST(DrmHwInfoTest, givenTopologyDataWithoutL3BankCountWhenSettingHwInfoThenL3Ba ioctlHelper->topologyDataToSet.maxEusPerSubSlice = 1; ioctlHelper->topologyDataToSet.euCount = 1; ioctlHelper->topologyDataToSet.maxSlices = 1; - ioctlHelper->topologyDataToSet.maxSubSlicesPerSlice = 16; + ioctlHelper->topologyDataToSet.maxSubSlicesPerSlice = 8; auto hwInfo = executionEnvironment->rootDeviceEnvironments[0]->getMutableHardwareInfo(); @@ -2387,8 +2390,8 @@ TEST(DrmHwInfoTest, givenTopologyDataWithoutL3BankCountWhenSettingHwInfoThenL3Ba EXPECT_EQ(hwInfo->gtSystemInfo.L3BankCount, 8u); - EXPECT_EQ(hwInfo->gtSystemInfo.MaxSubSlicesSupported, 16u); - EXPECT_EQ(hwInfo->gtSystemInfo.MaxDualSubSlicesSupported, 16u); + EXPECT_EQ(hwInfo->gtSystemInfo.MaxSubSlicesSupported, 8u); + EXPECT_EQ(hwInfo->gtSystemInfo.MaxDualSubSlicesSupported, 8u); } TEST(DrmWrapperTest, givenEAgainOrEIntrOrEBusyWhenCheckingIfReinvokeRequiredThenTrueIsReturned) { diff --git a/shared/test/unit_test/os_interface/product_helper_tests.cpp b/shared/test/unit_test/os_interface/product_helper_tests.cpp index 40ab8cdb5b..7f863f2e89 100644 --- a/shared/test/unit_test/os_interface/product_helper_tests.cpp +++ b/shared/test/unit_test/os_interface/product_helper_tests.cpp @@ -1092,22 +1092,25 @@ HWTEST2_F(ProductHelperTest, givenProductHelperWhenItsXe2PlusThenCacheLineSizeIs EXPECT_EQ(productHelper->getCacheLineSize(), 256u); } -TEST_F(ProductHelperTest, whenGettingMaxSubSliceSpaceThenValueIsNotSmallerThanMaxSubSliceCount) { - constexpr auto maxSupportedSubSlices = 128u; +TEST_F(ProductHelperTest, whenGettingMaxSubSliceSpaceThenValueIsEqualToMaxEnabled) { + constexpr auto maxSupportedSubSlices = 64u; auto hwInfo = *defaultHwInfo; auto >SystemInfo = hwInfo.gtSystemInfo; - gtSystemInfo.SliceCount = 1; + gtSystemInfo.SliceCount = 8; gtSystemInfo.SubSliceCount = 2; gtSystemInfo.DualSubSliceCount = 2; - gtSystemInfo.MaxSlicesSupported = 2; - gtSystemInfo.MaxSlicesSupported = 2; + gtSystemInfo.MaxSlicesSupported = 8; gtSystemInfo.MaxSubSlicesSupported = maxSupportedSubSlices; gtSystemInfo.MaxDualSubSlicesSupported = maxSupportedSubSlices; gtSystemInfo.IsDynamicallyPopulated = true; for (uint32_t slice = 0; slice < GT_MAX_SLICE; slice++) { gtSystemInfo.SliceInfo[slice].Enabled = slice < gtSystemInfo.SliceCount; + for (int32_t dssID = 0; dssID < GT_MAX_DUALSUBSLICE_PER_SLICE; dssID++) { + auto enabled = (slice * GT_MAX_DUALSUBSLICE_PER_SLICE) + dssID < maxSupportedSubSlices; + gtSystemInfo.SliceInfo[slice].DSSInfo[dssID].Enabled = enabled; + } } EXPECT_EQ(maxSupportedSubSlices, productHelper->computeMaxNeededSubSliceSpace(hwInfo)); }