refactor: add seperate enablers for compute walker and gpgpu walker

Related-To: NEO-10641
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2024-06-26 16:45:18 +00:00
committed by Compute-Runtime-Automation
parent 4c3c89adb1
commit 70e52ce4d8
11 changed files with 89 additions and 17 deletions

View File

@@ -0,0 +1,65 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "hardware_commands_helper.h"
template struct NEO::HardwareCommandsHelper<NEO::FamilyType>;
template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendIndirectState<NEO::FamilyType::COMPUTE_WALKER, NEO::FamilyType::INTERFACE_DESCRIPTOR_DATA>(
LinearStream &commandStream,
IndirectHeap &dsh,
IndirectHeap &ioh,
IndirectHeap &ssh,
Kernel &kernel,
uint64_t kernelStartOffset,
uint32_t simd,
const size_t localWorkSize[3],
const uint32_t threadGroupCount,
const uint64_t offsetInterfaceDescriptorTable,
uint32_t &interfaceDescriptorIndex,
PreemptionMode preemptionMode,
FamilyType::COMPUTE_WALKER *walkerCmd,
FamilyType::INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
bool localIdsGenerationByRuntime,
uint64_t scratchAddress,
const Device &device,
bool heaplessStateInitEnabled);
template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendCrossThreadData<NEO::FamilyType::COMPUTE_WALKER>(
IndirectHeap &indirectHeap,
Kernel &kernel,
bool inlineDataProgrammingRequired,
FamilyType::COMPUTE_WALKER *walkerCmd,
uint32_t &sizeCrossThreadData,
uint64_t scratchAddress,
const RootDeviceEnvironment &rootDeviceEnvironment);
template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendInterfaceDescriptorData<NEO::FamilyType::COMPUTE_WALKER, NEO::FamilyType::INTERFACE_DESCRIPTOR_DATA>(
const IndirectHeap &indirectHeap,
uint64_t offsetInterfaceDescriptor,
uint64_t kernelStartOffset,
size_t sizeCrossThreadData,
size_t sizePerThreadData,
size_t bindingTablePointer,
[[maybe_unused]] size_t offsetSamplerState,
uint32_t numSamplers,
const uint32_t threadGroupCount,
uint32_t numThreadsPerThreadGroup,
const Kernel &kernel,
uint32_t bindingTablePrefetchSize,
PreemptionMode preemptionMode,
const Device &device,
FamilyType::COMPUTE_WALKER *walkerCmd,
FamilyType::INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
bool heaplessStateInitEnabled);
template void NEO::HardwareCommandsHelper<NEO::FamilyType>::programInlineData<NEO::FamilyType::COMPUTE_WALKER>(
Kernel &kernel,
FamilyType::COMPUTE_WALKER *walkerCmd, uint64_t indirectDataAddress, uint64_t scratchAddress);
template void NEO::HardwareCommandsHelper<NEO::FamilyType>::setInterfaceDescriptorOffset<NEO::FamilyType::COMPUTE_WALKER>(
FamilyType::COMPUTE_WALKER *walkerCmd,
uint32_t &interfaceDescriptorIndex);

View File

@@ -8,7 +8,7 @@
#include "hardware_commands_helper.h"
template struct NEO::HardwareCommandsHelper<NEO::FamilyType>;
template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendIndirectState<NEO::FamilyType::DefaultWalkerType, NEO::FamilyType::INTERFACE_DESCRIPTOR_DATA>(
template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendIndirectState<NEO::FamilyType::GPGPU_WALKER, NEO::FamilyType::INTERFACE_DESCRIPTOR_DATA>(
LinearStream &commandStream,
IndirectHeap &dsh,
IndirectHeap &ioh,
@@ -21,23 +21,23 @@ template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendIndirectState<
const uint64_t offsetInterfaceDescriptorTable,
uint32_t &interfaceDescriptorIndex,
PreemptionMode preemptionMode,
FamilyType::DefaultWalkerType *walkerCmd,
FamilyType::GPGPU_WALKER *walkerCmd,
FamilyType::INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
bool localIdsGenerationByRuntime,
uint64_t scratchAddress,
const Device &device,
bool heaplessStateInitEnabled);
template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendCrossThreadData<NEO::FamilyType::DefaultWalkerType>(
template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendCrossThreadData<NEO::FamilyType::GPGPU_WALKER>(
IndirectHeap &indirectHeap,
Kernel &kernel,
bool inlineDataProgrammingRequired,
FamilyType::DefaultWalkerType *walkerCmd,
FamilyType::GPGPU_WALKER *walkerCmd,
uint32_t &sizeCrossThreadData,
uint64_t scratchAddress,
const RootDeviceEnvironment &rootDeviceEnvironment);
template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendInterfaceDescriptorData<NEO::FamilyType::DefaultWalkerType, NEO::FamilyType::INTERFACE_DESCRIPTOR_DATA>(
template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendInterfaceDescriptorData<NEO::FamilyType::GPGPU_WALKER, NEO::FamilyType::INTERFACE_DESCRIPTOR_DATA>(
const IndirectHeap &indirectHeap,
uint64_t offsetInterfaceDescriptor,
uint64_t kernelStartOffset,
@@ -52,10 +52,14 @@ template size_t NEO::HardwareCommandsHelper<NEO::FamilyType>::sendInterfaceDescr
uint32_t bindingTablePrefetchSize,
PreemptionMode preemptionMode,
const Device &device,
FamilyType::DefaultWalkerType *walkerCmd,
FamilyType::GPGPU_WALKER *walkerCmd,
FamilyType::INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
bool heaplessStateInitEnabled);
template void NEO::HardwareCommandsHelper<NEO::FamilyType>::programInlineData<NEO::FamilyType::DefaultWalkerType>(
template void NEO::HardwareCommandsHelper<NEO::FamilyType>::programInlineData<NEO::FamilyType::GPGPU_WALKER>(
Kernel &kernel,
FamilyType::DefaultWalkerType *walkerCmd, uint64_t indirectDataAddress, uint64_t scratchAddress);
FamilyType::GPGPU_WALKER *walkerCmd, uint64_t indirectDataAddress, uint64_t scratchAddress);
template void NEO::HardwareCommandsHelper<NEO::FamilyType>::setInterfaceDescriptorOffset<NEO::FamilyType::GPGPU_WALKER>(
FamilyType::GPGPU_WALKER *walkerCmd,
uint32_t &interfaceDescriptorIndex);

View File

@@ -126,8 +126,9 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
static size_t getTotalSizeRequiredSSH(
const MultiDispatchInfo &multiDispatchInfo);
template <typename WalkerType>
static void setInterfaceDescriptorOffset(
DefaultWalkerType *walkerCmd,
WalkerType *walkerCmd,
uint32_t &interfaceDescriptorIndex);
static bool kernelUsesLocalIds(const Kernel &kernel);

View File

@@ -111,8 +111,9 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
}
template <typename GfxFamily>
template <typename WalkerType>
void HardwareCommandsHelper<GfxFamily>::setInterfaceDescriptorOffset(
DefaultWalkerType *walkerCmd,
WalkerType *walkerCmd,
uint32_t &interfaceDescriptorIndex) {
walkerCmd->setInterfaceDescriptorOffset(interfaceDescriptorIndex++);

View File

@@ -133,8 +133,9 @@ size_t HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
}
template <typename GfxFamily>
template <typename WalkerType>
void HardwareCommandsHelper<GfxFamily>::setInterfaceDescriptorOffset(
DefaultWalkerType *walkerCmd,
WalkerType *walkerCmd,
uint32_t &interfaceDescriptorIndex) {
}