refactor: Improve scratch programming in heapless mode
Related-To: NEO-7621 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
a104d9199d
commit
6d3a53fe7f
|
@ -107,41 +107,12 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||
numWorkGroups, walkerArgs.localWorkSizes, simd, dim,
|
||||
localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder);
|
||||
|
||||
auto requiredScratchSlot0Size = queueCsr.getRequiredScratchSlot0Size();
|
||||
auto requiredScratchSlot1Size = queueCsr.getRequiredScratchSlot1Size();
|
||||
uint64_t scratchAddress = 0u;
|
||||
EncodeDispatchKernel<GfxFamily>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, &ssh, queueCsr);
|
||||
|
||||
auto interfaceDescriptor = &walkerCmd.getInterfaceDescriptor();
|
||||
uint64_t scratchAddress = 0;
|
||||
|
||||
if constexpr (heaplessModeEnabled) {
|
||||
auto scratchAllocation = queueCsr.getScratchAllocation();
|
||||
auto scratchSpaceController = queueCsr.getScratchSpaceController();
|
||||
if (scratchAllocation) {
|
||||
scratchAddress = ssh.getGpuBase() + scratchSpaceController->getScratchPatchAddress();
|
||||
} else {
|
||||
auto requiredScratchSlot0Size = queueCsr.getRequiredScratchSlot0Size();
|
||||
auto requiredScratchSlot1Size = queueCsr.getRequiredScratchSlot1Size();
|
||||
bool stateBaseAddressDirty = false;
|
||||
bool checkVfeStateDirty = false;
|
||||
|
||||
if (requiredScratchSlot0Size || requiredScratchSlot1Size) {
|
||||
|
||||
scratchSpaceController->setRequiredScratchSpace(ssh.getCpuBase(),
|
||||
0u,
|
||||
requiredScratchSlot0Size,
|
||||
requiredScratchSlot1Size,
|
||||
queueCsr.peekTaskCount(), queueCsr.getOsContext(),
|
||||
stateBaseAddressDirty,
|
||||
checkVfeStateDirty);
|
||||
|
||||
if (scratchSpaceController->getScratchSpaceSlot0Allocation()) {
|
||||
queueCsr.makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation());
|
||||
}
|
||||
if (scratchSpaceController->getScratchSpaceSlot1Allocation()) {
|
||||
queueCsr.makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation());
|
||||
}
|
||||
|
||||
scratchAddress = ssh.getGpuBase() + scratchSpaceController->getScratchPatchAddress();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HardwareCommandsHelper<GfxFamily>::template sendIndirectState<WalkerType, InterfaceDescriptorType>(
|
||||
commandStream,
|
||||
|
|
|
@ -184,6 +184,9 @@ struct EncodeDispatchKernel {
|
|||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||
return BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE;
|
||||
}
|
||||
|
||||
template <bool isHeapless>
|
||||
static void setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -768,6 +768,11 @@ size_t EncodeDispatchKernel<Family>::getDefaultDshAlignment() {
|
|||
return EncodeStates<Family>::alignIndirectStatePointer;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
template <bool isHeapless>
|
||||
void EncodeDispatchKernel<Family>::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr) {
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
|
|
|
@ -22,6 +22,8 @@ template void NEO::EncodeDispatchKernel<Family>::encode<Family::DefaultWalkerTyp
|
|||
template void NEO::EncodeDispatchKernel<Family>::encodeThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template void NEO::EncodeDispatchKernel<Family>::adjustWalkOrder<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||
|
||||
template struct NEO::EncodeStates<Family>;
|
||||
template struct NEO::EncodeMath<Family>;
|
||||
|
|
|
@ -321,27 +321,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
}
|
||||
|
||||
if constexpr (heaplessModeEnabled) {
|
||||
|
||||
auto requiredScratchSlot0Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||
auto requiredScratchSlot1Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[1];
|
||||
uint64_t scratchAddress = 0;
|
||||
if (requiredScratchSlot0Size > 0 || requiredScratchSlot1Size > 0) {
|
||||
auto csr = args.device->getDefaultEngine().commandStreamReceiver;
|
||||
auto scratchController = csr->getScratchSpaceController();
|
||||
bool gsbaState = false;
|
||||
bool frontEndState = false;
|
||||
auto ssh = container.getIndirectHeap(HeapType::surfaceState);
|
||||
scratchController->setRequiredScratchSpace(ssh->getCpuBase(), 0, requiredScratchSlot0Size, requiredScratchSlot1Size,
|
||||
csr->peekTaskCount(), csr->getOsContext(), gsbaState, frontEndState);
|
||||
auto csr = args.device->getDefaultEngine().commandStreamReceiver;
|
||||
auto ssh = container.getIndirectHeap(HeapType::surfaceState);
|
||||
|
||||
if (scratchController->getScratchSpaceSlot0Allocation()) {
|
||||
csr->makeResident(*scratchController->getScratchSpaceSlot0Allocation());
|
||||
}
|
||||
if (scratchController->getScratchSpaceSlot1Allocation()) {
|
||||
csr->makeResident(*scratchController->getScratchSpaceSlot1Allocation());
|
||||
}
|
||||
uint64_t scratchAddress = 0u;
|
||||
|
||||
scratchAddress = ssh->getGpuBase() + scratchController->getScratchPatchAddress();
|
||||
}
|
||||
EncodeDispatchKernel<Family>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, ssh, *csr);
|
||||
|
||||
auto inlineDataPointer = reinterpret_cast<char *>(walkerCmd.getInlineDataPointer());
|
||||
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
||||
|
|
|
@ -1539,3 +1539,19 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScalingSecondaryBufferTe
|
|||
givenDispatchImplicitScalingWithBbStartOverControlSectionWhenDispatchingAsSecondaryBufferContainerThenExpectSecondaryBatchBuffer) {
|
||||
testBodyFindPrimaryBatchBuffer<FamilyType>();
|
||||
}
|
||||
|
||||
using EncodeKernelScratchProgrammingTest = Test<ScratchProgrammingFixture>;
|
||||
|
||||
HWTEST2_F(EncodeKernelScratchProgrammingTest, givenHeaplessModeDisabledWhenSetScratchAddressIsCalledThenDoNothing, IsAtLeastXeHpCore) {
|
||||
|
||||
static constexpr bool heaplessModeEnabled = false;
|
||||
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
uint64_t scratchAddress = 0;
|
||||
uint32_t requiredScratchSlot0Size = 64;
|
||||
uint32_t requiredScratchSlot1Size = 0;
|
||||
|
||||
EncodeDispatchKernel<FamilyType>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, ssh, ultCsr);
|
||||
|
||||
uint64_t expectedScratchAddress = 0;
|
||||
EXPECT_EQ(expectedScratchAddress, scratchAddress);
|
||||
}
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
* Copyright (C) 2022-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -8,8 +8,10 @@
|
|||
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
|
||||
|
||||
#include "shared/source/indirect_heap/heap_size.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
|
@ -72,6 +74,25 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel
|
|||
return args;
|
||||
}
|
||||
|
||||
void ScratchProgrammingFixture::setUp() {
|
||||
NEO::DeviceFixture::setUp();
|
||||
size_t sizeStream = 512;
|
||||
size_t alignmentStream = 0x1000;
|
||||
ssh = new IndirectHeap{nullptr};
|
||||
sshBuffer = alignedMalloc(sizeStream, alignmentStream);
|
||||
ASSERT_NE(nullptr, sshBuffer);
|
||||
ssh->replaceBuffer(sshBuffer, sizeStream);
|
||||
auto graphicsAllocation = new MockGraphicsAllocation(sshBuffer, sizeStream);
|
||||
ssh->replaceGraphicsAllocation(graphicsAllocation);
|
||||
}
|
||||
|
||||
void ScratchProgrammingFixture::tearDown() {
|
||||
delete ssh->getGraphicsAllocation();
|
||||
delete ssh;
|
||||
alignedFree(sshBuffer);
|
||||
NEO::DeviceFixture::tearDown();
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
void WalkerThreadFixture::setUp() {
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -65,6 +65,14 @@ class CommandEncodeStatesFixture : public DeviceFixture {
|
|||
NEO::L1CachePolicy l1CachePolicyData;
|
||||
};
|
||||
|
||||
struct ScratchProgrammingFixture : public NEO::DeviceFixture {
|
||||
void setUp();
|
||||
void tearDown();
|
||||
|
||||
IndirectHeap *ssh = nullptr;
|
||||
void *sshBuffer = nullptr;
|
||||
};
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
struct WalkerThreadFixture {
|
||||
|
|
Loading…
Reference in New Issue