performance: optimize memory used for scratch programming

- reserve exact number of slots for scratch surface states in surface
state heaps
- do not use offsets for contexts depending on engine type executing
cmdlists

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe 2025-01-14 13:51:12 +00:00 committed by Compute-Runtime-Automation
parent 5e13e2a1b3
commit 60495a5b20
3 changed files with 11 additions and 10 deletions

View File

@ -36,7 +36,13 @@ namespace L0 {
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() { size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
return 4 * MemoryConstants::pageSize; constexpr size_t maxPtssSteps = 16;
constexpr size_t numSlotsPerStep = 2;
constexpr size_t numSteps = 2;
constexpr size_t startSlotIndex = 1;
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
return (maxPtssSteps * numSlotsPerStep + startSlotIndex) * numSteps * sizeof(RENDER_SURFACE_STATE);
} }
template <GFXCORE_FAMILY gfxCoreFamily> template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2021-2024 Intel Corporation * Copyright (C) 2021-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@ -142,13 +142,8 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHe
csr->getOsContext(), gsbaState, frontEndState); csr->getOsContext(), gsbaState, frontEndState);
} }
NEO::Device *neoDevice = device->getNEODevice();
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
auto &productHelper = neoDevice->getProductHelper();
if (sshHeaps.size() > 0) { if (sshHeaps.size() > 0) {
uint32_t offsetIndex = gfxCoreHelper.getMaxPtssIndex(productHelper) * csr->getOsContext().getEngineType() + 1u; scratchController->programHeaps(sshHeaps, 1u, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size,
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size,
csr->getOsContext(), gsbaState, frontEndState); csr->getOsContext(), gsbaState, frontEndState);
} }

View File

@ -204,11 +204,11 @@ HWTEST2_F(CommandListTests, whenCommandListIsCreatedAndProgramExtendedPipeContro
} }
using CommandListTestsReserveSize = Test<DeviceFixture>; using CommandListTestsReserveSize = Test<DeviceFixture>;
HWTEST2_F(CommandListTestsReserveSize, givenCommandListWhenGetReserveSshSizeThen4PagesReturned, IsAtLeastXeHpCore) { HWTEST2_F(CommandListTestsReserveSize, givenCommandListWhenGetReserveSshSizeThen16slotSpaceReturned, IsAtLeastXeHpCore) {
L0::CommandListCoreFamily<gfxCoreFamily> commandList(1u); L0::CommandListCoreFamily<gfxCoreFamily> commandList(1u);
commandList.initialize(device, NEO::EngineGroupType::compute, 0u); commandList.initialize(device, NEO::EngineGroupType::compute, 0u);
EXPECT_EQ(commandList.getReserveSshSize(), 4 * MemoryConstants::pageSize); EXPECT_EQ(commandList.getReserveSshSize(), (16 * 2 + 1) * 2 * sizeof(typename FamilyType::RENDER_SURFACE_STATE));
} }
using CommandListAppendLaunchKernel = Test<ModuleFixture>; using CommandListAppendLaunchKernel = Test<ModuleFixture>;