mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 14:02:58 +08:00
fix: correct thread/eu ratio for scratch to Xe2
Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ae6c6a3625
commit
a3b6c1fa6d
@@ -1823,10 +1823,19 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
auto commandListHandle1 = commandList1->toHandle();
|
||||
commandList1->close();
|
||||
|
||||
auto &productHelper = device->getProductHelper();
|
||||
|
||||
commandQueue->executeCommandLists(1, &commandListHandle0, nullptr, false, nullptr);
|
||||
EXPECT_EQ(0u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
|
||||
auto expectedScratchSize = 0u;
|
||||
productHelper.adjustPerThreadScratchSize(expectedScratchSize);
|
||||
|
||||
EXPECT_EQ(expectedScratchSize, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
commandQueue->executeCommandLists(1, &commandListHandle1, nullptr, false, nullptr);
|
||||
EXPECT_EQ(512u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
|
||||
expectedScratchSize = 512u;
|
||||
productHelper.adjustPerThreadScratchSize(expectedScratchSize);
|
||||
EXPECT_EQ(expectedScratchSize, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
|
||||
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
|
||||
@@ -1854,9 +1863,16 @@ HWTEST2_F(ExecuteCommandListTests, givenTwoCommandQueuesHavingTwoB2BCommandLists
|
||||
false,
|
||||
returnValue));
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle0, nullptr, false, nullptr);
|
||||
EXPECT_EQ(1024u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
|
||||
expectedScratchSize = 1024u;
|
||||
productHelper.adjustPerThreadScratchSize(expectedScratchSize);
|
||||
EXPECT_EQ(expectedScratchSize, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
|
||||
commandQueue1->executeCommandLists(1, &commandListHandle1, nullptr, false, nullptr);
|
||||
EXPECT_EQ(2048u, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
|
||||
expectedScratchSize = 2048u;
|
||||
productHelper.adjustPerThreadScratchSize(expectedScratchSize);
|
||||
EXPECT_EQ(expectedScratchSize, csr->getScratchSpaceController()->getPerThreadScratchSizeSlot1());
|
||||
|
||||
usedSpaceAfter = commandQueue1->commandStream.getUsed();
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -184,7 +184,12 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] = 0x1000;
|
||||
auto &gfxCoreHelper = pDevice->getRootDeviceEnvironment().getHelper<GfxCoreHelper>();
|
||||
uint32_t computeUnits = gfxCoreHelper.getComputeUnitsUsedForScratch(pDevice->getRootDeviceEnvironment());
|
||||
size_t scratchSpaceSize = kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0] * computeUnits;
|
||||
auto perThreadScratchSize = kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||
|
||||
auto &productHelper = pDevice->getProductHelper();
|
||||
productHelper.adjustPerThreadScratchSize(perThreadScratchSize);
|
||||
|
||||
auto scratchSpaceSize = perThreadScratchSize * computeUnits;
|
||||
|
||||
commandQueue.enqueueKernel(kernel, 1, nullptr, &gws, nullptr, 0, nullptr, nullptr);
|
||||
commandQueue.flush();
|
||||
@@ -208,8 +213,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScrat
|
||||
EXPECT_EQ(length.surfaceState.depth + 1u, scratchState->getDepth());
|
||||
EXPECT_EQ(length.surfaceState.width + 1u, scratchState->getWidth());
|
||||
EXPECT_EQ(length.surfaceState.height + 1u, scratchState->getHeight());
|
||||
auto &productHelper = pDevice->getProductHelper();
|
||||
EXPECT_EQ(kernel.kernelInfo.kernelDescriptor.kernelAttributes.perThreadScratchSize[0], EncodeSurfaceState<FamilyType>::getPitchForScratchInBytes(scratchState, productHelper));
|
||||
|
||||
EXPECT_EQ(perThreadScratchSize, EncodeSurfaceState<FamilyType>::getPitchForScratchInBytes(scratchState, productHelper));
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTestXeHPAndLater, givenScratchSpaceSurfaceStateEnabledWhenNewSshProvidedAndNoScratchAllocationExistThenNoDirtyBitSet) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "shared/source/memory_manager/internal_allocation_storage.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/os_interface/os_context.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
|
||||
namespace NEO {
|
||||
ScratchSpaceControllerXeHPAndLater::ScratchSpaceControllerXeHPAndLater(uint32_t rootDeviceIndex,
|
||||
@@ -158,6 +159,8 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
|
||||
if (!Math::isPow2(requiredPerThreadScratchSizeSlot0AlignedUp)) {
|
||||
requiredPerThreadScratchSizeSlot0AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot0);
|
||||
}
|
||||
auto &productHelper = executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHelper<ProductHelper>();
|
||||
productHelper.adjustPerThreadScratchSize(requiredPerThreadScratchSizeSlot0AlignedUp);
|
||||
size_t requiredScratchSizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeSlot0AlignedUp) * computeUnitsUsedForScratch;
|
||||
scratchSurfaceDirty = false;
|
||||
auto multiTileCapable = osContext.getNumSupportedDevices() > 1;
|
||||
@@ -176,6 +179,7 @@ void ScratchSpaceControllerXeHPAndLater::prepareScratchAllocation(uint32_t requi
|
||||
if (!Math::isPow2(requiredPerThreadScratchSizeSlot1AlignedUp)) {
|
||||
requiredPerThreadScratchSizeSlot1AlignedUp = Math::nextPowerOfTwo(requiredPerThreadScratchSizeSlot1);
|
||||
}
|
||||
productHelper.adjustPerThreadScratchSize(requiredPerThreadScratchSizeSlot1AlignedUp);
|
||||
size_t requiredScratchSlot1SizeInBytes = static_cast<size_t>(requiredPerThreadScratchSizeSlot1AlignedUp) * computeUnitsUsedForScratch;
|
||||
if (scratchSlot1SizeInBytes < requiredScratchSlot1SizeInBytes) {
|
||||
if (scratchSlot1Allocation) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
* Copyright (C) 2018-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -145,6 +145,7 @@ class ProductHelper {
|
||||
virtual bool isGlobalFenceInDirectSubmissionRequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isCopyEngineSelectorEnabled(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual uint32_t getThreadEuRatioForScratch(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual void adjustPerThreadScratchSize(uint32_t &requiredPerThreadScratchSize) const = 0;
|
||||
virtual size_t getSvmCpuAlignment() const = 0;
|
||||
virtual bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isVmBindPatIndexProgrammingSupported() const = 0;
|
||||
|
||||
@@ -533,6 +533,10 @@ uint32_t ProductHelperHw<gfxProduct>::getThreadEuRatioForScratch(const HardwareI
|
||||
return 8u;
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
void ProductHelperHw<gfxProduct>::adjustPerThreadScratchSize(uint32_t &requiredPerThreadScratchSize) const {
|
||||
}
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
size_t ProductHelperHw<gfxProduct>::getSvmCpuAlignment() const {
|
||||
return MemoryConstants::pageSize2M;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2023-2024 Intel Corporation
|
||||
* Copyright (C) 2023-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -88,6 +88,7 @@ class ProductHelperHw : public ProductHelper {
|
||||
bool isGlobalFenceInCommandStreamRequired(const HardwareInfo &hwInfo) const override;
|
||||
bool isGlobalFenceInDirectSubmissionRequired(const HardwareInfo &hwInfo) const override;
|
||||
uint32_t getThreadEuRatioForScratch(const HardwareInfo &hwInfo) const override;
|
||||
void adjustPerThreadScratchSize(uint32_t &requiredPerThreadScratchSize) const override;
|
||||
size_t getSvmCpuAlignment() const override;
|
||||
bool isComputeDispatchAllWalkerEnableInCfeStateRequired(const HardwareInfo &hwInfo) const override;
|
||||
bool isVmBindPatIndexProgrammingSupported() const override;
|
||||
|
||||
@@ -40,14 +40,14 @@ void ProductHelperHw<gfxProduct>::adjustNumberOfCcs(HardwareInfo &hwInfo) const
|
||||
hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled = 1;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getThreadEuRatioForScratch(const HardwareInfo &hwInfo) const {
|
||||
return 16u;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool ProductHelperHw<gfxProduct>::isDirectSubmissionSupported(ReleaseHelper *releaseHelper) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
void ProductHelperHw<gfxProduct>::adjustPerThreadScratchSize(uint32_t &requiredPerThreadScratchSize) const {
|
||||
requiredPerThreadScratchSize *= 2;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -78,18 +78,6 @@ uint32_t GfxCoreHelperHw<Family>::getMinimalSIMDSize() const {
|
||||
return 16u;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t GfxCoreHelperHw<Family>::getComputeUnitsUsedForScratch(const RootDeviceEnvironment &rootDeviceEnvironment) const {
|
||||
if (debugManager.flags.OverrideNumComputeUnitsForScratch.get() != -1) {
|
||||
return static_cast<uint32_t>(debugManager.flags.OverrideNumComputeUnitsForScratch.get());
|
||||
}
|
||||
|
||||
auto &productHelper = rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto hwInfo = rootDeviceEnvironment.getHardwareInfo();
|
||||
auto maxSubSlice = productHelper.computeMaxNeededSubSliceSpace(*hwInfo);
|
||||
return maxSubSlice * hwInfo->gtSystemInfo.MaxEuPerSubSlice * productHelper.getThreadEuRatioForScratch(*hwInfo);
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t GfxCoreHelperHw<Family>::getMocsIndex(const GmmHelper &gmmHelper, bool l3enabled, bool l1enabled) const {
|
||||
if (l3enabled) {
|
||||
|
||||
@@ -98,9 +98,4 @@ bool ProductHelperHw<gfxProduct>::isDeviceUsmAllocationReuseSupported() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t ProductHelperHw<gfxProduct>::getThreadEuRatioForScratch(const HardwareInfo &hwInfo) const {
|
||||
return 16u;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -4883,6 +4883,10 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfa
|
||||
|
||||
uint32_t perThreadScratchSize = 65;
|
||||
uint32_t expectedValue = Math::nextPowerOfTwo(perThreadScratchSize);
|
||||
|
||||
auto &productHelper = getHelper<ProductHelper>();
|
||||
productHelper.adjustPerThreadScratchSize(expectedValue);
|
||||
|
||||
bool stateBaseAddressDirty = false;
|
||||
bool cfeStateDirty = false;
|
||||
uint8_t surfaceHeap[1000];
|
||||
@@ -4905,6 +4909,11 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverHwTest, givenScratchSpaceSurfa
|
||||
bool stateBaseAddressDirty = false;
|
||||
scratchController->setRequiredScratchSpace(surfaceState, 0u, 0u, misalignedSizeForPrivateScratch,
|
||||
*pDevice->getDefaultEngine().osContext, stateBaseAddressDirty, cfeStateDirty);
|
||||
|
||||
auto &productHelper = getHelper<ProductHelper>();
|
||||
productHelper.adjustPerThreadScratchSize(misalignedSizeForPrivateScratch);
|
||||
productHelper.adjustPerThreadScratchSize(alignedSizeForPrivateScratch);
|
||||
|
||||
EXPECT_NE(scratchController->scratchSlot1SizeInBytes, misalignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
|
||||
EXPECT_EQ(scratchController->scratchSlot1SizeInBytes, alignedSizeForPrivateScratch * scratchController->computeUnitsUsedForScratch);
|
||||
EXPECT_EQ(scratchController->scratchSlot1SizeInBytes, scratchController->getScratchSpaceSlot1Allocation()->getUnderlyingBufferSize());
|
||||
|
||||
@@ -1098,3 +1098,10 @@ HWTEST2_F(ProductHelperTest, givenProductHelperWhenGetRequiredDetectIndirectVers
|
||||
EXPECT_EQ(9u, productHelper->getRequiredDetectIndirectVersion());
|
||||
EXPECT_EQ(6u, productHelper->getRequiredDetectIndirectVersionVC());
|
||||
}
|
||||
|
||||
HWTEST_F(ProductHelperTest, whenAdjustPerThreadScratchSizeThenSizeIsNotChanged) {
|
||||
constexpr uint32_t initialPerThreadScratchSize = 0xDEADBEEF;
|
||||
uint32_t perThreadScratchSize = initialPerThreadScratchSize;
|
||||
productHelper->adjustPerThreadScratchSize(perThreadScratchSize);
|
||||
EXPECT_EQ(initialPerThreadScratchSize, perThreadScratchSize);
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
* Copyright (C) 2024-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -8,3 +8,4 @@
|
||||
#include "shared/test/common/test_macros/hw_test_base.h"
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, whenGettingPreferredAllocationMethodThenNoPreferenceIsReturned, IGFX_BMG);
|
||||
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, whenAdjustPerThreadScratchSizeThenSizeIsNotChanged, IGFX_BMG);
|
||||
|
||||
@@ -119,11 +119,13 @@ BMGTEST_F(BmgProductHelper, givenProductHelperWhenAdjustNumberOfCcsThenOverrideT
|
||||
EXPECT_EQ(hwInfo.gtSystemInfo.CCSInfo.NumberOfCCSEnabled, 1u);
|
||||
}
|
||||
|
||||
BMGTEST_F(BmgProductHelper, givenProductHelperWhenGettingThreadEuRatioForScratchThen16IsReturned) {
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
EXPECT_EQ(16u, productHelper->getThreadEuRatioForScratch(hwInfo));
|
||||
}
|
||||
|
||||
BMGTEST_F(BmgProductHelper, givenProductHelperWhenCheckDirectSubmissionSupportedThenTrueIsReturned) {
|
||||
EXPECT_TRUE(productHelper->isDirectSubmissionSupported(releaseHelper));
|
||||
}
|
||||
|
||||
BMGTEST_F(BmgProductHelper, whenAdjustPerThreadScratchSizeThenSizeIsDoubled) {
|
||||
constexpr uint32_t initialPerThreadScratchSize = 0x1234u;
|
||||
uint32_t perThreadScratchSize = initialPerThreadScratchSize;
|
||||
productHelper->adjustPerThreadScratchSize(perThreadScratchSize);
|
||||
EXPECT_EQ(initialPerThreadScratchSize * 2, perThreadScratchSize);
|
||||
}
|
||||
|
||||
@@ -35,5 +35,4 @@ HWTEST_EXCLUDE_PRODUCT(GmmCompressionTests, givenEnabledAndPreferredE2ECWhenAppl
|
||||
HWTEST_EXCLUDE_PRODUCT(CommandEncodeSemaphore, givenIndirectModeSetWhenProgrammingSemaphoreThenSetIndirectBit_IsAtLeastXeHpCore, IGFX_XE2_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenBooleanUncachedWhenCallOverridePatIndexThenProperPatIndexIsReturned, IGFX_XE2_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, whenEncodeAdditionalTimestampOffsetsThenNothingEncoded, IGFX_XE2_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(ProductHelperTest, givenProductHelperWhenGetThreadEuRatioForScratchThen8IsReturned, IGFX_XE2_HPG_CORE);
|
||||
HWTEST_EXCLUDE_PRODUCT(GfxCoreHelperTest, givenGetDeviceTimestampWidthCalledThenReturnCorrectValue, IGFX_XE2_HPG_CORE);
|
||||
|
||||
@@ -157,8 +157,3 @@ LNLTEST_F(LnlProductHelper, givenProductHelperWhenCheckingIsDeviceUsmAllocationR
|
||||
LNLTEST_F(LnlProductHelper, givenProductHelperWhenCheckingIsBufferPoolAllocatorSupportedThenCorrectValueIsReturned) {
|
||||
EXPECT_TRUE(productHelper->isBufferPoolAllocatorSupported());
|
||||
}
|
||||
|
||||
LNLTEST_F(LnlProductHelper, givenProductHelperWhenGettingThreadEuRatioForScratchThen16IsReturned) {
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
EXPECT_EQ(16u, productHelper->getThreadEuRatioForScratch(hwInfo));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user