mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
refactor: Improve scratch programming in heapless mode
Related-To: NEO-7621 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a104d9199d
commit
6d3a53fe7f
@@ -107,41 +107,12 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
|||||||
numWorkGroups, walkerArgs.localWorkSizes, simd, dim,
|
numWorkGroups, walkerArgs.localWorkSizes, simd, dim,
|
||||||
localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder);
|
localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder);
|
||||||
|
|
||||||
|
auto requiredScratchSlot0Size = queueCsr.getRequiredScratchSlot0Size();
|
||||||
|
auto requiredScratchSlot1Size = queueCsr.getRequiredScratchSlot1Size();
|
||||||
|
uint64_t scratchAddress = 0u;
|
||||||
|
EncodeDispatchKernel<GfxFamily>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, &ssh, queueCsr);
|
||||||
|
|
||||||
auto interfaceDescriptor = &walkerCmd.getInterfaceDescriptor();
|
auto interfaceDescriptor = &walkerCmd.getInterfaceDescriptor();
|
||||||
uint64_t scratchAddress = 0;
|
|
||||||
|
|
||||||
if constexpr (heaplessModeEnabled) {
|
|
||||||
auto scratchAllocation = queueCsr.getScratchAllocation();
|
|
||||||
auto scratchSpaceController = queueCsr.getScratchSpaceController();
|
|
||||||
if (scratchAllocation) {
|
|
||||||
scratchAddress = ssh.getGpuBase() + scratchSpaceController->getScratchPatchAddress();
|
|
||||||
} else {
|
|
||||||
auto requiredScratchSlot0Size = queueCsr.getRequiredScratchSlot0Size();
|
|
||||||
auto requiredScratchSlot1Size = queueCsr.getRequiredScratchSlot1Size();
|
|
||||||
bool stateBaseAddressDirty = false;
|
|
||||||
bool checkVfeStateDirty = false;
|
|
||||||
|
|
||||||
if (requiredScratchSlot0Size || requiredScratchSlot1Size) {
|
|
||||||
|
|
||||||
scratchSpaceController->setRequiredScratchSpace(ssh.getCpuBase(),
|
|
||||||
0u,
|
|
||||||
requiredScratchSlot0Size,
|
|
||||||
requiredScratchSlot1Size,
|
|
||||||
queueCsr.peekTaskCount(), queueCsr.getOsContext(),
|
|
||||||
stateBaseAddressDirty,
|
|
||||||
checkVfeStateDirty);
|
|
||||||
|
|
||||||
if (scratchSpaceController->getScratchSpaceSlot0Allocation()) {
|
|
||||||
queueCsr.makeResident(*scratchSpaceController->getScratchSpaceSlot0Allocation());
|
|
||||||
}
|
|
||||||
if (scratchSpaceController->getScratchSpaceSlot1Allocation()) {
|
|
||||||
queueCsr.makeResident(*scratchSpaceController->getScratchSpaceSlot1Allocation());
|
|
||||||
}
|
|
||||||
|
|
||||||
scratchAddress = ssh.getGpuBase() + scratchSpaceController->getScratchPatchAddress();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HardwareCommandsHelper<GfxFamily>::template sendIndirectState<WalkerType, InterfaceDescriptorType>(
|
HardwareCommandsHelper<GfxFamily>::template sendIndirectState<WalkerType, InterfaceDescriptorType>(
|
||||||
commandStream,
|
commandStream,
|
||||||
|
|||||||
@@ -184,6 +184,9 @@ struct EncodeDispatchKernel {
|
|||||||
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
using BINDING_TABLE_STATE = typename GfxFamily::BINDING_TABLE_STATE;
|
||||||
return BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE;
|
return BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <bool isHeapless>
|
||||||
|
static void setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -768,6 +768,11 @@ size_t EncodeDispatchKernel<Family>::getDefaultDshAlignment() {
|
|||||||
return EncodeStates<Family>::alignIndirectStatePointer;
|
return EncodeStates<Family>::alignIndirectStatePointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Family>
|
||||||
|
template <bool isHeapless>
|
||||||
|
void EncodeDispatchKernel<Family>::setScratchAddress(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr) {
|
||||||
|
}
|
||||||
|
|
||||||
template <typename Family>
|
template <typename Family>
|
||||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws) {
|
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], uint64_t crossThreadAddress, const uint32_t *lws) {
|
||||||
for (int i = 0; i < 3; ++i) {
|
for (int i = 0; i < 3; ++i) {
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ template void NEO::EncodeDispatchKernel<Family>::encode<Family::DefaultWalkerTyp
|
|||||||
template void NEO::EncodeDispatchKernel<Family>::encodeThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
template void NEO::EncodeDispatchKernel<Family>::encodeThreadData<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, const uint32_t *startWorkGroup, const uint32_t *numWorkGroups, const uint32_t *workGroupSizes, uint32_t simd, uint32_t localIdDimensions, uint32_t threadsPerThreadGroup, uint32_t threadExecutionMask, bool localIdsGenerationByRuntime, bool inlineDataProgrammingRequired, bool isIndirect, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::adjustWalkOrder<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
template void NEO::EncodeDispatchKernel<Family>::adjustWalkOrder<Family::DefaultWalkerType>(Family::DefaultWalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||||
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
|
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
|
||||||
|
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||||
|
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||||
|
|
||||||
template struct NEO::EncodeStates<Family>;
|
template struct NEO::EncodeStates<Family>;
|
||||||
template struct NEO::EncodeMath<Family>;
|
template struct NEO::EncodeMath<Family>;
|
||||||
|
|||||||
@@ -321,27 +321,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||||||
}
|
}
|
||||||
|
|
||||||
if constexpr (heaplessModeEnabled) {
|
if constexpr (heaplessModeEnabled) {
|
||||||
|
|
||||||
auto requiredScratchSlot0Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
auto requiredScratchSlot0Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||||
auto requiredScratchSlot1Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[1];
|
auto requiredScratchSlot1Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[1];
|
||||||
uint64_t scratchAddress = 0;
|
auto csr = args.device->getDefaultEngine().commandStreamReceiver;
|
||||||
if (requiredScratchSlot0Size > 0 || requiredScratchSlot1Size > 0) {
|
auto ssh = container.getIndirectHeap(HeapType::surfaceState);
|
||||||
auto csr = args.device->getDefaultEngine().commandStreamReceiver;
|
|
||||||
auto scratchController = csr->getScratchSpaceController();
|
|
||||||
bool gsbaState = false;
|
|
||||||
bool frontEndState = false;
|
|
||||||
auto ssh = container.getIndirectHeap(HeapType::surfaceState);
|
|
||||||
scratchController->setRequiredScratchSpace(ssh->getCpuBase(), 0, requiredScratchSlot0Size, requiredScratchSlot1Size,
|
|
||||||
csr->peekTaskCount(), csr->getOsContext(), gsbaState, frontEndState);
|
|
||||||
|
|
||||||
if (scratchController->getScratchSpaceSlot0Allocation()) {
|
uint64_t scratchAddress = 0u;
|
||||||
csr->makeResident(*scratchController->getScratchSpaceSlot0Allocation());
|
|
||||||
}
|
|
||||||
if (scratchController->getScratchSpaceSlot1Allocation()) {
|
|
||||||
csr->makeResident(*scratchController->getScratchSpaceSlot1Allocation());
|
|
||||||
}
|
|
||||||
|
|
||||||
scratchAddress = ssh->getGpuBase() + scratchController->getScratchPatchAddress();
|
EncodeDispatchKernel<Family>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, ssh, *csr);
|
||||||
}
|
|
||||||
|
|
||||||
auto inlineDataPointer = reinterpret_cast<char *>(walkerCmd.getInlineDataPointer());
|
auto inlineDataPointer = reinterpret_cast<char *>(walkerCmd.getInlineDataPointer());
|
||||||
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
||||||
|
|||||||
@@ -1539,3 +1539,19 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesImplicitScalingSecondaryBufferTe
|
|||||||
givenDispatchImplicitScalingWithBbStartOverControlSectionWhenDispatchingAsSecondaryBufferContainerThenExpectSecondaryBatchBuffer) {
|
givenDispatchImplicitScalingWithBbStartOverControlSectionWhenDispatchingAsSecondaryBufferContainerThenExpectSecondaryBatchBuffer) {
|
||||||
testBodyFindPrimaryBatchBuffer<FamilyType>();
|
testBodyFindPrimaryBatchBuffer<FamilyType>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using EncodeKernelScratchProgrammingTest = Test<ScratchProgrammingFixture>;
|
||||||
|
|
||||||
|
HWTEST2_F(EncodeKernelScratchProgrammingTest, givenHeaplessModeDisabledWhenSetScratchAddressIsCalledThenDoNothing, IsAtLeastXeHpCore) {
|
||||||
|
|
||||||
|
static constexpr bool heaplessModeEnabled = false;
|
||||||
|
auto &ultCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
uint64_t scratchAddress = 0;
|
||||||
|
uint32_t requiredScratchSlot0Size = 64;
|
||||||
|
uint32_t requiredScratchSlot1Size = 0;
|
||||||
|
|
||||||
|
EncodeDispatchKernel<FamilyType>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, ssh, ultCsr);
|
||||||
|
|
||||||
|
uint64_t expectedScratchAddress = 0;
|
||||||
|
EXPECT_EQ(expectedScratchAddress, scratchAddress);
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2022-2023 Intel Corporation
|
* Copyright (C) 2022-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -8,8 +8,10 @@
|
|||||||
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
|
#include "shared/test/unit_test/fixtures/command_container_fixture.h"
|
||||||
|
|
||||||
#include "shared/source/indirect_heap/heap_size.h"
|
#include "shared/source/indirect_heap/heap_size.h"
|
||||||
|
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||||
#include "shared/source/os_interface/product_helper.h"
|
#include "shared/source/os_interface/product_helper.h"
|
||||||
#include "shared/test/common/mocks/mock_device.h"
|
#include "shared/test/common/mocks/mock_device.h"
|
||||||
|
#include "shared/test/common/mocks/mock_graphics_allocation.h"
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
|
|
||||||
@@ -72,6 +74,25 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel
|
|||||||
return args;
|
return args;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ScratchProgrammingFixture::setUp() {
|
||||||
|
NEO::DeviceFixture::setUp();
|
||||||
|
size_t sizeStream = 512;
|
||||||
|
size_t alignmentStream = 0x1000;
|
||||||
|
ssh = new IndirectHeap{nullptr};
|
||||||
|
sshBuffer = alignedMalloc(sizeStream, alignmentStream);
|
||||||
|
ASSERT_NE(nullptr, sshBuffer);
|
||||||
|
ssh->replaceBuffer(sshBuffer, sizeStream);
|
||||||
|
auto graphicsAllocation = new MockGraphicsAllocation(sshBuffer, sizeStream);
|
||||||
|
ssh->replaceGraphicsAllocation(graphicsAllocation);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ScratchProgrammingFixture::tearDown() {
|
||||||
|
delete ssh->getGraphicsAllocation();
|
||||||
|
delete ssh;
|
||||||
|
alignedFree(sshBuffer);
|
||||||
|
NEO::DeviceFixture::tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
||||||
void WalkerThreadFixture::setUp() {
|
void WalkerThreadFixture::setUp() {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2023 Intel Corporation
|
* Copyright (C) 2020-2024 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@@ -65,6 +65,14 @@ class CommandEncodeStatesFixture : public DeviceFixture {
|
|||||||
NEO::L1CachePolicy l1CachePolicyData;
|
NEO::L1CachePolicy l1CachePolicyData;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ScratchProgrammingFixture : public NEO::DeviceFixture {
|
||||||
|
void setUp();
|
||||||
|
void tearDown();
|
||||||
|
|
||||||
|
IndirectHeap *ssh = nullptr;
|
||||||
|
void *sshBuffer = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|
||||||
struct WalkerThreadFixture {
|
struct WalkerThreadFixture {
|
||||||
|
|||||||
Reference in New Issue
Block a user