refactor: move scratch and kab programming to a function
Related-To: NEO-7824 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
6a55bbe6cd
commit
6ffa756457
|
@ -178,6 +178,9 @@ struct EncodeDispatchKernel {
|
|||
template <typename WalkerType>
|
||||
static void adjustWalkOrder(WalkerType &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
template <bool heaplessModeEnabled>
|
||||
static void programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData);
|
||||
|
||||
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount);
|
||||
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
|
||||
inline static size_t additionalSizeRequiredDsh(uint32_t iddCount);
|
||||
|
|
|
@ -24,6 +24,7 @@ template void NEO::EncodeDispatchKernel<Family>::adjustWalkOrder<Family::Default
|
|||
template void NEO::EncodeDispatchKernel<Family>::programBarrierEnable<Family::INTERFACE_DESCRIPTOR_DATA>(Family::INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t value, const HardwareInfo &hwInfo);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<false>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||
template void NEO::EncodeDispatchKernel<Family>::setScratchAddress<true>(uint64_t &scratchAddress, uint32_t requiredScratchSlot0Size, uint32_t requiredScratchSlot1Size, IndirectHeap *ssh, CommandStreamReceiver &csr);
|
||||
template void NEO::EncodeDispatchKernel<Family>::programInlineDataHeapless<false>(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData);
|
||||
|
||||
template struct NEO::EncodeStates<Family>;
|
||||
template struct NEO::EncodeMath<Family>;
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
|
||||
namespace NEO {
|
||||
template <typename Family>
|
||||
template <bool heaplessModeEnabled>
|
||||
void EncodeDispatchKernel<Family>::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData) {
|
||||
}
|
||||
|
||||
} // namespace NEO
|
|
@ -87,24 +87,22 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
|
||||
bool localIdsGenerationByRuntime = args.dispatchInterface->requiresGenerationOfLocalIdsByRuntime();
|
||||
auto requiredWorkgroupOrder = args.dispatchInterface->getRequiredWorkgroupOrder();
|
||||
bool inlineDataProgramming = EncodeDispatchKernel<Family>::inlineDataProgrammingRequired(kernelDescriptor);
|
||||
{
|
||||
auto alloc = args.dispatchInterface->getIsaAllocation();
|
||||
UNRECOVERABLE_IF(nullptr == alloc);
|
||||
|
||||
{
|
||||
auto isaAllocation = args.dispatchInterface->getIsaAllocation();
|
||||
UNRECOVERABLE_IF(nullptr == isaAllocation);
|
||||
|
||||
uint64_t kernelStartPointer = args.dispatchInterface->getIsaOffsetInParentAllocation();
|
||||
if constexpr (heaplessModeEnabled) {
|
||||
auto address = alloc->getGpuAddress() + args.dispatchInterface->getIsaOffsetInParentAllocation();
|
||||
if (!localIdsGenerationByRuntime) {
|
||||
address += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||
}
|
||||
idd.setKernelStartPointer(address);
|
||||
kernelStartPointer += isaAllocation->getGpuAddress();
|
||||
} else {
|
||||
auto offset = alloc->getGpuAddressToPatch() + args.dispatchInterface->getIsaOffsetInParentAllocation();
|
||||
kernelStartPointer += isaAllocation->getGpuAddressToPatch();
|
||||
}
|
||||
|
||||
if (!localIdsGenerationByRuntime) {
|
||||
offset += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||
}
|
||||
idd.setKernelStartPointer(offset);
|
||||
kernelStartPointer += kernelDescriptor.entryPoints.skipPerThreadDataLoad;
|
||||
}
|
||||
idd.setKernelStartPointer(kernelStartPointer);
|
||||
}
|
||||
if (args.dispatchInterface->getKernelDescriptor().kernelAttributes.flags.usesAssert && args.device->getL0Debugger() != nullptr) {
|
||||
idd.setSoftwareExceptionEnable(1);
|
||||
|
@ -231,11 +229,11 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
auto crossThreadData = args.dispatchInterface->getCrossThreadData();
|
||||
|
||||
uint32_t inlineDataProgrammingOffset = 0u;
|
||||
|
||||
bool inlineDataProgramming = EncodeDispatchKernel<Family>::inlineDataProgrammingRequired(kernelDescriptor);
|
||||
if (inlineDataProgramming) {
|
||||
inlineDataProgrammingOffset = std::min(inlineDataSize, sizeCrossThreadData);
|
||||
auto dest = reinterpret_cast<char *>(walkerCmd.getInlineDataPointer());
|
||||
memcpy_s(dest, inlineDataProgrammingOffset, crossThreadData, inlineDataProgrammingOffset);
|
||||
memcpy_s(dest, inlineDataSize, crossThreadData, inlineDataProgrammingOffset);
|
||||
sizeCrossThreadData -= inlineDataProgrammingOffset;
|
||||
crossThreadData = ptrOffset(crossThreadData, inlineDataProgrammingOffset);
|
||||
inlineDataProgramming = inlineDataProgrammingOffset != 0;
|
||||
|
@ -325,30 +323,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
|||
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
|
||||
}
|
||||
|
||||
if constexpr (heaplessModeEnabled) {
|
||||
auto inlineDataPointer = reinterpret_cast<char *>(walkerCmd.getInlineDataPointer());
|
||||
auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
|
||||
auto heap = container.getIndirectHeap(HeapType::indirectObject);
|
||||
auto address = heap->getHeapGpuBase() + offsetThreadData;
|
||||
std::memcpy(inlineDataPointer + indirectDataPointerAddress.offset, &address, indirectDataPointerAddress.pointerSize);
|
||||
uint8_t *inlineData = reinterpret_cast<uint8_t *>(walkerCmd.getInlineDataPointer());
|
||||
EncodeDispatchKernel<Family>::programInlineDataHeapless<heaplessModeEnabled>(inlineData, args, container, offsetThreadData);
|
||||
|
||||
if (args.immediateScratchAddressPatching) {
|
||||
auto requiredScratchSlot0Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
|
||||
auto requiredScratchSlot1Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[1];
|
||||
auto csr = args.device->getDefaultEngine().commandStreamReceiver;
|
||||
NEO::IndirectHeap *ssh = nullptr;
|
||||
if (csr->getGlobalStatelessHeapAllocation() != nullptr) {
|
||||
ssh = csr->getGlobalStatelessHeap();
|
||||
} else {
|
||||
ssh = args.surfaceStateHeap ? args.surfaceStateHeap : container.getIndirectHeap(HeapType::surfaceState);
|
||||
}
|
||||
|
||||
uint64_t scratchAddress = 0u;
|
||||
EncodeDispatchKernel<Family>::template setScratchAddress<heaplessModeEnabled>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, ssh, *csr);
|
||||
auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
|
||||
std::memcpy(inlineDataPointer + scratchPointerAddress.offset, &scratchAddress, scratchPointerAddress.pointerSize);
|
||||
}
|
||||
} else {
|
||||
if constexpr (heaplessModeEnabled == false) {
|
||||
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
||||
walkerCmd.setIndirectDataLength(sizeThreadData);
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ using Family = NEO::Gen11Family;
|
|||
|
||||
#include "shared/source/command_container/command_encoder.inl"
|
||||
#include "shared/source/command_container/command_encoder_bdw_and_later.inl"
|
||||
#include "shared/source/command_container/command_encoder_heap_addressing.inl"
|
||||
#include "shared/source/command_container/encode_compute_mode_bdw_and_later.inl"
|
||||
#include "shared/source/command_container/image_surface_state/compression_params_bdw_and_later.inl"
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@ using Family = NEO::Gen12LpFamily;
|
|||
|
||||
#include "shared/source/command_container/command_encoder.inl"
|
||||
#include "shared/source/command_container/command_encoder_bdw_and_later.inl"
|
||||
#include "shared/source/command_container/command_encoder_heap_addressing.inl"
|
||||
#include "shared/source/command_container/command_encoder_tgllp_and_later.inl"
|
||||
#include "shared/source/command_container/encode_compute_mode_tgllp_and_later.inl"
|
||||
#include "shared/source/command_container/image_surface_state/compression_params_bdw_and_later.inl"
|
||||
|
|
|
@ -13,6 +13,7 @@ using Family = NEO::Gen8Family;
|
|||
|
||||
#include "shared/source/command_container/command_encoder.inl"
|
||||
#include "shared/source/command_container/command_encoder_bdw_and_later.inl"
|
||||
#include "shared/source/command_container/command_encoder_heap_addressing.inl"
|
||||
#include "shared/source/command_container/encode_compute_mode_bdw_and_later.inl"
|
||||
#include "shared/source/command_container/image_surface_state/compression_params_bdw_and_later.inl"
|
||||
|
||||
|
|
|
@ -13,6 +13,7 @@ using Family = NEO::Gen9Family;
|
|||
|
||||
#include "shared/source/command_container/command_encoder.inl"
|
||||
#include "shared/source/command_container/command_encoder_bdw_and_later.inl"
|
||||
#include "shared/source/command_container/command_encoder_heap_addressing.inl"
|
||||
#include "shared/source/command_container/encode_compute_mode_bdw_and_later.inl"
|
||||
#include "shared/source/command_container/image_surface_state/compression_params_bdw_and_later.inl"
|
||||
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
using Family = NEO::XeHpcCoreFamily;
|
||||
|
||||
#include "shared/source/command_container/command_encoder_heap_addressing.inl"
|
||||
#include "shared/source/command_container/command_encoder_tgllp_and_later.inl"
|
||||
#include "shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl"
|
||||
#include "shared/source/command_container/command_encoder_xe_hpg_core_and_later.inl"
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
using Family = NEO::XeHpgCoreFamily;
|
||||
|
||||
#include "shared/source/command_container/command_encoder_heap_addressing.inl"
|
||||
#include "shared/source/command_container/command_encoder_tgllp_and_later.inl"
|
||||
#include "shared/source/command_container/command_encoder_xe_hpg_core_and_later.inl"
|
||||
#include "shared/source/command_container/image_surface_state/compression_params_tgllp_and_later.inl"
|
||||
|
|
Loading…
Reference in New Issue