fix: correct thread/eu ratio for scratch to Xe2

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski
2025-01-08 13:52:31 +00:00
committed by Compute-Runtime-Automation
parent ae6c6a3625
commit a3b6c1fa6d
15 changed files with 74 additions and 47 deletions

View File

@@ -1823,10 +1823,19 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
auto commandListHandle1 = commandList1->toHandle();
commandList1->close();
auto &productHelper = device->getProductHelper();
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false, nullptr);
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
auto expectedScratchSize = 0u;
productHelper.adjustPerThreadScratchSize(expectedScratchSize);
EXPECT_EQ(expectedScratchSize, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false, nullptr);
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
expectedScratchSize = 512u;
productHelper.adjustPerThreadScratchSize(expectedScratchSize);
EXPECT_EQ(expectedScratchSize, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
@@ -1854,9 +1863,16 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
false,
returnValue));
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false, nullptr);
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
expectedScratchSize = 1024u;
productHelper.adjustPerThreadScratchSize(expectedScratchSize);
EXPECT_EQ(expectedScratchSize, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false, nullptr);
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
expectedScratchSize = 2048u;
productHelper.adjustPerThreadScratchSize(expectedScratchSize);
EXPECT_EQ(expectedScratchSize, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
usedSpaceAfter = commandQueue1->commandStream.getUsed();

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -184,7 +184,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = 0x1000;
auto &gfxCoreHelper = pDevice->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
uint32_t computeUnits = gfxCoreHelper.getComputeUnitsUsedForScratch(pDevice->getRootDeviceEnvironment());
size_t scratchSpaceSize = kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * computeUnits;
auto perThreadScratchSize = kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
auto &productHelper = pDevice->getProductHelper();
productHelper.adjustPerThreadScratchSize(perThreadScratchSize);
auto scratchSpaceSize = perThreadScratchSize * computeUnits;
commandQueue.enqueueKernel(kernel, 1, nullptr, &gws, nullptr, 0, nullptr, nullptr);
commandQueue.flush();
@@ -208,8 +213,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
EXPECT_EQ(length.surfaceState.depth + 1u, scratchState->getDepth());
EXPECT_EQ(length.surfaceState.width + 1u, scratchState->getWidth());
EXPECT_EQ(length.surfaceState.height + 1u, scratchState->getHeight());
auto &productHelper = pDevice->getProductHelper();
EXPECT_EQ(kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0], EncodeSurfaceState<FamilyType>::getPitchForScratchInBytes(scratchState, productHelper));
EXPECT_EQ(perThreadScratchSize, EncodeSurfaceState<FamilyType>::getPitchForScratchInBytes(scratchState, productHelper));
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenNewSshProvidedAndNoScratchAllocationExistThenNoDirtyBitSet) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -21,6 +21,7 @@
#include "shared/source/memory_manager/internal_allocation_storage.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/os_interface/os_context.h"
#include "shared/source/os_interface/product_helper.h"
namespace NEO {
ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex,
@@ -158,6 +159,8 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
if (!Math::isPow2(requiredPerThreadScratchSizeSlot0AlignedUp)) {
requiredPerThreadScratchSizeSlot0AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot0);
}
auto &productHelper = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHelper<ProductHelper>();
productHelper.adjustPerThreadScratchSize(requiredPerThreadScratchSizeSlot0AlignedUp);
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeSlot0AlignedUp) * computeUnitsUsedForScratch;
scratchSurfaceDirty = false;
auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
@@ -176,6 +179,7 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
if (!Math::isPow2(requiredPerThreadScratchSizeSlot1AlignedUp)) {
requiredPerThreadScratchSizeSlot1AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot1);
}
productHelper.adjustPerThreadScratchSize(requiredPerThreadScratchSizeSlot1AlignedUp);
size_t requiredScratchSlot1SizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeSlot1AlignedUp) * computeUnitsUsedForScratch;
if (scratchSlot1SizeInBytes < requiredScratchSlot1SizeInBytes) {
if (scratchSlot1Allocation) {

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -145,6 +145,7 @@ class ProductHelper {
virtual bool isGlobalFenceInDirectSubmissionRequired(const HardwareInfo &hwInfo) const = 0;
virtual bool isCopyEngineSelectorEnabled(const HardwareInfo &hwInfo) const = 0;
virtual uint32_t getThreadEuRatioForScratch(const HardwareInfo &hwInfo) const = 0;
virtual void adjustPerThreadScratchSize(uint32_t &requiredPerThreadScratchSize) const = 0;
virtual size_t getSvmCpuAlignment() const = 0;
virtual bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const = 0;
virtual bool isVmBindPatIndexProgrammingSupported() const = 0;

View File

@@ -533,6 +533,10 @@ uint32_t ProductHelperHw<gfxProduct>::getThreadEuRatioForScratch(const HardwareI
return 8u;
}
template <PRODUCT_FAMILY gfxProduct>
void ProductHelperHw<gfxProduct>::adjustPerThreadScratchSize(uint32_t &requiredPerThreadScratchSize) const {
}
template <PRODUCT_FAMILY gfxProduct>
size_t ProductHelperHw<gfxProduct>::getSvmCpuAlignment() const {
return MemoryConstants::pageSize2M;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2023-2024 Intel Corporation
* Copyright (C) 2023-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -88,6 +88,7 @@ class ProductHelperHw : public ProductHelper {
bool isGlobalFenceInCommandStreamRequired(const HardwareInfo &hwInfo) const override;
bool isGlobalFenceInDirectSubmissionRequired(const HardwareInfo &hwInfo) const override;
uint32_t getThreadEuRatioForScratch(const HardwareInfo &hwInfo) const override;
void adjustPerThreadScratchSize(uint32_t &requiredPerThreadScratchSize) const override;
size_t getSvmCpuAlignment() const override;
bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const override;
bool isVmBindPatIndexProgrammingSupported() const override;

View File

@@ -40,14 +40,14 @@ void ProductHelperHw<gfxProduct>::adjustNumberOfCcs(HardwareInfo &hwInfo) const
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1;
}
template <>
uint32_t ProductHelperHw<gfxProduct>::getThreadEuRatioForScratch(const HardwareInfo &hwInfo) const {
return 16u;
}
template <>
bool ProductHelperHw<gfxProduct>::isDirectSubmissionSupported(ReleaseHelper *releaseHelper) const {
return true;
}
template <>
void ProductHelperHw<gfxProduct>::adjustPerThreadScratchSize(uint32_t &requiredPerThreadScratchSize) const {
requiredPerThreadScratchSize *= 2;
}
} // namespace NEO

View File

@@ -78,18 +78,6 @@ uint32_t GfxCoreHelperHw<Family>::getMinimalSIMDSize() const {
return 16u;
}
template <>
uint32_t GfxCoreHelperHw<Family>::getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const {
if (debugManager.flags.OverrideNumComputeUnitsForScratch.get() != -1) {
return static_cast<uint32_t>(debugManager.flags.OverrideNumComputeUnitsForScratch.get());
}
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
auto maxSubSlice = productHelper.computeMaxNeededSubSliceSpace(*hwInfo);
return maxSubSlice * hwInfo->gtSystemInfo.MaxEuPerSubSlice * productHelper.getThreadEuRatioForScratch(*hwInfo);
}
template <>
uint32_t GfxCoreHelperHw<Family>::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const {
if (l3enabled) {

View File

@@ -98,9 +98,4 @@ bool ProductHelperHw<gfxProduct>::isDeviceUsmAllocationReuseSupported() const {
return true;
}
template <>
uint32_t ProductHelperHw<gfxProduct>::getThreadEuRatioForScratch(const HardwareInfo &hwInfo) const {
return 16u;
}
} // namespace NEO

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -4883,6 +4883,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfa
uint32_t perThreadScratchSize = 65;
uint32_t expectedValue = Math::nextPowerOfTwo(perThreadScratchSize);
auto &productHelper = getHelper<ProductHelper>();
productHelper.adjustPerThreadScratchSize(expectedValue);
bool stateBaseAddressDirty = false;
bool cfeStateDirty = false;
uint8_t surfaceHeap[1000];
@@ -4905,6 +4909,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfa
bool stateBaseAddressDirty = false;
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch,
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
auto &productHelper = getHelper<ProductHelper>();
productHelper.adjustPerThreadScratchSize(misalignedSizeForPrivateScratch);
productHelper.adjustPerThreadScratchSize(alignedSizeForPrivateScratch);
EXPECT_NE(scratchController->scratchSlot1SizeInBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
EXPECT_EQ(scratchController->scratchSlot1SizeInBytes, alignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
EXPECT_EQ(scratchController->scratchSlot1SizeInBytes, scratchController->getScratchSpaceSlot1Allocation()->getUnderlyingBufferSize());

View File

@@ -1098,3 +1098,10 @@ HWTEST2_F(ProductHelperTest, givenProductHelperWhenGetRequiredDetectIndirectVers
EXPECT_EQ(9u, productHelper->getRequiredDetectIndirectVersion());
EXPECT_EQ(6u, productHelper->getRequiredDetectIndirectVersionVC());
}
HWTEST_F(ProductHelperTest, whenAdjustPerThreadScratchSizeThenSizeIsNotChanged) {
constexpr uint32_t initialPerThreadScratchSize = 0xDEADBEEF;
uint32_t perThreadScratchSize = initialPerThreadScratchSize;
productHelper->adjustPerThreadScratchSize(perThreadScratchSize);
EXPECT_EQ(initialPerThreadScratchSize, perThreadScratchSize);
}

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -8,3 +8,4 @@
#include "shared/test/common/test_macros/hw_test_base.h"
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, whenGettingPreferredAllocationMethodThenNoPreferenceIsReturned, IGFX_BMG);
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, whenAdjustPerThreadScratchSizeThenSizeIsNotChanged, IGFX_BMG);

View File

@@ -119,11 +119,13 @@ BMGTEST_F(BmgProductHelper, givenProductHelperWhenAdjustNumberOfCcsThenOverrideT
EXPECT_EQ(hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled, 1u);
}
BMGTEST_F(BmgProductHelper, givenProductHelperWhenGettingThreadEuRatioForScratchThen16IsReturned) {
auto hwInfo = *defaultHwInfo;
EXPECT_EQ(16u, productHelper->getThreadEuRatioForScratch(hwInfo));
}
BMGTEST_F(BmgProductHelper, givenProductHelperWhenCheckDirectSubmissionSupportedThenTrueIsReturned) {
EXPECT_TRUE(productHelper->isDirectSubmissionSupported(releaseHelper));
}
BMGTEST_F(BmgProductHelper, whenAdjustPerThreadScratchSizeThenSizeIsDoubled) {
constexpr uint32_t initialPerThreadScratchSize = 0x1234u;
uint32_t perThreadScratchSize = initialPerThreadScratchSize;
productHelper->adjustPerThreadScratchSize(perThreadScratchSize);
EXPECT_EQ(initialPerThreadScratchSize * 2, perThreadScratchSize);
}

View File

@@ -35,5 +35,4 @@ HWTEST_EXCLUDE_PRODUCT(GmmCompressionTests, givenEnabledAndPreferredE2ECWhenAppl
HWTEST_EXCLUDE_PRODUCT(CommandEncodeSemaphore, givenIndirectModeSetWhenProgrammingSemaphoreThenSetIndirectBit_IsAtLeastXeHpCore, IGFX_XE2_HPG_CORE);
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenBooleanUncachedWhenCallOverridePatIndexThenProperPatIndexIsReturned, IGFX_XE2_HPG_CORE);
HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, whenEncodeAdditionalTimestampOffsetsThenNothingEncoded, IGFX_XE2_HPG_CORE);
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenGetThreadEuRatioForScratchThen8IsReturned, IGFX_XE2_HPG_CORE);
HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, givenGetDeviceTimestampWidthCalledThenReturnCorrectValue, IGFX_XE2_HPG_CORE);

View File

@@ -157,8 +157,3 @@ LNLTEST_F(LnlProductHelper, givenProductHelperWhenCheckingIsDeviceUsmAllocationR
LNLTEST_F(LnlProductHelper, givenProductHelperWhenCheckingIsBufferPoolAllocatorSupportedThenCorrectValueIsReturned) {
EXPECT_TRUE(productHelper->isBufferPoolAllocatorSupported());
}
LNLTEST_F(LnlProductHelper, givenProductHelperWhenGettingThreadEuRatioForScratchThen16IsReturned) {
auto hwInfo = *defaultHwInfo;
EXPECT_EQ(16u, productHelper->getThreadEuRatioForScratch(hwInfo));
}