mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 13:33:02 +08:00
performance: optimize memory used for scratch programming
- reserve exact number of slots for scratch surface states in surface state heaps - do not use offsets for contexts depending on engine type executing cmdlists Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
b2d25174fb
commit
9b2696a942
@@ -36,7 +36,10 @@ namespace L0 {
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandListCoreFamily<gfxCoreFamily>::getReserveSshSize() {
|
||||
return 4 * MemoryConstants::pageSize;
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
return 16 /* max ptss steps */
|
||||
* 2 * 2 /* two slots and two steps */
|
||||
* sizeof(RENDER_SURFACE_STATE);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
||||
@@ -142,13 +142,8 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHe
|
||||
csr->getOsContext(), gsbaState, frontEndState);
|
||||
}
|
||||
|
||||
NEO::Device *neoDevice = device->getNEODevice();
|
||||
auto &gfxCoreHelper = neoDevice->getGfxCoreHelper();
|
||||
auto &productHelper = neoDevice->getProductHelper();
|
||||
|
||||
if (sshHeaps.size() > 0) {
|
||||
uint32_t offsetIndex = gfxCoreHelper.getMaxPtssIndex(productHelper) * csr->getOsContext().getEngineType() + 1u;
|
||||
scratchController->programHeaps(sshHeaps, offsetIndex, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size,
|
||||
scratchController->programHeaps(sshHeaps, 0u, perThreadScratchSpaceSlot0Size, perThreadScratchSpaceSlot1Size,
|
||||
csr->getOsContext(), gsbaState, frontEndState);
|
||||
}
|
||||
|
||||
|
||||
@@ -210,11 +210,11 @@ HWTEST2_F(CommandListTests, whenCommandListIsCreatedAndProgramExtendedPipeContro
|
||||
}
|
||||
|
||||
using CommandListTestsReserveSize = Test<DeviceFixture>;
|
||||
HWTEST2_F(CommandListTestsReserveSize, givenCommandListWhenGetReserveSshSizeThen4PagesReturned, IsAtLeastXeHpCore) {
|
||||
HWTEST2_F(CommandListTestsReserveSize, givenCommandListWhenGetReserveSshSizeThen16slotSpaceReturned, IsAtLeastXeHpCore) {
|
||||
L0::CommandListCoreFamily<gfxCoreFamily> commandList(1u);
|
||||
commandList.initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
|
||||
EXPECT_EQ(commandList.getReserveSshSize(), 4 * MemoryConstants::pageSize);
|
||||
EXPECT_EQ(commandList.getReserveSshSize(), 16 * 2 * 2 * sizeof(typename FamilyType::RENDER_SURFACE_STATE));
|
||||
}
|
||||
|
||||
using CommandListAppendLaunchKernel = Test<ModuleFixture>;
|
||||
|
||||
Reference in New Issue
Block a user