2018-10-04 21:01:52 +08:00
|
|
|
/*
|
2023-01-04 00:20:12 +08:00
|
|
|
* Copyright (C) 2019-2023 Intel Corporation
|
2018-10-04 21:01:52 +08:00
|
|
|
*
|
|
|
|
* SPDX-License-Identifier: MIT
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2020-04-03 16:15:38 +08:00
|
|
|
#include "shared/source/command_container/command_encoder.h"
|
2023-01-21 01:45:04 +08:00
|
|
|
#include "shared/source/command_container/encode_surface_state.h"
|
2020-04-03 16:15:38 +08:00
|
|
|
#include "shared/source/command_stream/csr_definitions.h"
|
|
|
|
#include "shared/source/command_stream/preemption.h"
|
|
|
|
#include "shared/source/debug_settings/debug_settings_manager.h"
|
|
|
|
#include "shared/source/helpers/address_patch.h"
|
|
|
|
#include "shared/source/helpers/aligned_memory.h"
|
|
|
|
#include "shared/source/helpers/basic_math.h"
|
2023-02-02 00:23:01 +08:00
|
|
|
#include "shared/source/helpers/gfx_core_helper.h"
|
2023-02-07 20:53:53 +08:00
|
|
|
#include "shared/source/helpers/hw_info.h"
|
2020-10-07 21:09:42 +08:00
|
|
|
#include "shared/source/helpers/local_id_gen.h"
|
2020-04-03 16:15:38 +08:00
|
|
|
#include "shared/source/indirect_heap/indirect_heap.h"
|
2022-12-29 20:27:52 +08:00
|
|
|
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
2023-01-04 00:20:12 +08:00
|
|
|
#include "shared/source/kernel/implicit_args.h"
|
2020-02-24 17:22:30 +08:00
|
|
|
|
2020-04-03 16:15:38 +08:00
|
|
|
#include "opencl/source/cl_device/cl_device.h"
|
|
|
|
#include "opencl/source/context/context.h"
|
|
|
|
#include "opencl/source/helpers/dispatch_info.h"
|
2020-02-23 05:50:57 +08:00
|
|
|
#include "opencl/source/kernel/kernel.h"
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
namespace NEO {
|
2018-10-04 21:01:52 +08:00
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2021-03-22 23:26:03 +08:00
|
|
|
size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredDSH(const Kernel &kernel) {
|
2022-01-12 22:47:27 +08:00
|
|
|
constexpr auto samplerStateSize = sizeof(typename GfxFamily::SAMPLER_STATE);
|
|
|
|
constexpr auto maxIndirectSamplerStateSize = alignUp(sizeof(typename GfxFamily::SAMPLER_BORDER_COLOR_STATE), MemoryConstants::cacheLineSize);
|
|
|
|
const auto numSamplers = kernel.getKernelInfo().kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
|
|
|
|
|
|
|
if (numSamplers == 0U) {
|
|
|
|
return alignUp(additionalSizeRequiredDsh(), MemoryConstants::cacheLineSize);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto calculatedTotalSize = alignUp(maxIndirectSamplerStateSize + numSamplers * samplerStateSize + additionalSizeRequiredDsh(),
|
|
|
|
MemoryConstants::cacheLineSize);
|
|
|
|
DEBUG_BREAK_IF(calculatedTotalSize > kernel.getDynamicStateHeapSize());
|
|
|
|
return calculatedTotalSize;
|
2020-04-03 16:15:38 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2021-03-22 23:26:03 +08:00
|
|
|
size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(const Kernel &kernel,
|
|
|
|
size_t localWorkSize) {
|
2020-04-03 16:15:38 +08:00
|
|
|
typedef typename GfxFamily::WALKER_TYPE WALKER_TYPE;
|
2022-02-22 20:24:30 +08:00
|
|
|
const auto &kernelDescriptor = kernel.getDescriptor();
|
|
|
|
const auto &hwInfo = kernel.getHardwareInfo();
|
2018-10-04 21:01:52 +08:00
|
|
|
|
2022-02-22 20:24:30 +08:00
|
|
|
auto numChannels = kernelDescriptor.kernelAttributes.numLocalIdChannels;
|
|
|
|
uint32_t grfSize = hwInfo.capabilityTable.grfSize;
|
|
|
|
auto simdSize = kernelDescriptor.kernelAttributes.simdSize;
|
2021-09-08 21:20:44 +08:00
|
|
|
auto size = kernel.getCrossThreadDataSize() +
|
2022-02-17 20:41:06 +08:00
|
|
|
getPerThreadDataSizeTotal(simdSize, grfSize, numChannels, localWorkSize);
|
2021-09-08 21:20:44 +08:00
|
|
|
|
2022-02-22 20:24:30 +08:00
|
|
|
auto pImplicitArgs = kernel.getImplicitArgs();
|
|
|
|
if (pImplicitArgs) {
|
|
|
|
size += ImplicitArgsHelper::getSizeForImplicitArgsPatching(pImplicitArgs, kernelDescriptor, hwInfo);
|
2021-09-08 21:20:44 +08:00
|
|
|
}
|
|
|
|
return alignUp(size, WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
2018-10-04 21:01:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2021-03-22 23:26:03 +08:00
|
|
|
size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredSSH(const Kernel &kernel) {
|
2020-04-03 16:15:38 +08:00
|
|
|
typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE;
|
2021-03-22 23:26:03 +08:00
|
|
|
auto sizeSSH = kernel.getSurfaceStateHeapSize();
|
2020-04-03 16:15:38 +08:00
|
|
|
sizeSSH += sizeSSH ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0;
|
|
|
|
return sizeSSH;
|
|
|
|
}
|
|
|
|
|
|
|
|
template <typename SizeGetterT, typename... ArgsT>
|
|
|
|
size_t getSizeRequired(const MultiDispatchInfo &multiDispatchInfo, SizeGetterT &&getSize, ArgsT... args) {
|
|
|
|
size_t totalSize = 0;
|
|
|
|
auto it = multiDispatchInfo.begin();
|
|
|
|
for (auto e = multiDispatchInfo.end(); it != e; ++it) {
|
|
|
|
totalSize = alignUp(totalSize, MemoryConstants::cacheLineSize);
|
|
|
|
totalSize += getSize(*it, std::forward<ArgsT>(args)...);
|
|
|
|
}
|
|
|
|
totalSize = alignUp(totalSize, MemoryConstants::pageSize);
|
|
|
|
return totalSize;
|
2018-10-04 21:01:52 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
template <typename GfxFamily>
|
2020-04-03 16:15:38 +08:00
|
|
|
size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredDSH(
|
|
|
|
const MultiDispatchInfo &multiDispatchInfo) {
|
2021-03-22 23:26:03 +08:00
|
|
|
return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredDSH(*dispatchInfo.getKernel()); });
|
2018-10-04 21:01:52 +08:00
|
|
|
}
|
|
|
|
|
2019-01-31 21:47:55 +08:00
|
|
|
template <typename GfxFamily>
|
2020-04-03 16:15:38 +08:00
|
|
|
size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredIOH(
|
|
|
|
const MultiDispatchInfo &multiDispatchInfo) {
|
2020-11-19 02:39:32 +08:00
|
|
|
return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredIOH(
|
|
|
|
*dispatchInfo.getKernel(),
|
|
|
|
Math::computeTotalElementsCount(dispatchInfo.getLocalWorkgroupSize())); });
|
2019-01-31 21:47:55 +08:00
|
|
|
}
|
|
|
|
|
2018-10-04 21:01:52 +08:00
|
|
|
template <typename GfxFamily>
|
2020-04-03 16:15:38 +08:00
|
|
|
size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredSSH(
|
|
|
|
const MultiDispatchInfo &multiDispatchInfo) {
|
2021-03-22 23:26:03 +08:00
|
|
|
return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel()); });
|
2020-04-03 16:15:38 +08:00
|
|
|
}
|
|
|
|
|
2018-10-04 21:01:52 +08:00
|
|
|
template <typename GfxFamily>
|
2020-04-03 16:15:38 +08:00
|
|
|
size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
|
|
|
const IndirectHeap &indirectHeap,
|
|
|
|
uint64_t offsetInterfaceDescriptor,
|
|
|
|
uint64_t kernelStartOffset,
|
|
|
|
size_t sizeCrossThreadData,
|
|
|
|
size_t sizePerThreadData,
|
|
|
|
size_t bindingTablePointer,
|
2022-01-14 07:57:00 +08:00
|
|
|
[[maybe_unused]] size_t offsetSamplerState,
|
2020-04-03 16:15:38 +08:00
|
|
|
uint32_t numSamplers,
|
2022-08-03 20:22:30 +08:00
|
|
|
const uint32_t threadGroupCount,
|
2020-04-03 16:15:38 +08:00
|
|
|
uint32_t threadsPerThreadGroup,
|
|
|
|
const Kernel &kernel,
|
|
|
|
uint32_t bindingTablePrefetchSize,
|
|
|
|
PreemptionMode preemptionMode,
|
2020-11-13 18:41:45 +08:00
|
|
|
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
|
2020-12-07 22:41:52 +08:00
|
|
|
const Device &device) {
|
2020-04-03 16:15:38 +08:00
|
|
|
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
2020-12-04 19:57:11 +08:00
|
|
|
using SHARED_LOCAL_MEMORY_SIZE = typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2020-12-07 22:41:52 +08:00
|
|
|
const auto &hardwareInfo = device.getHardwareInfo();
|
2022-01-20 19:23:30 +08:00
|
|
|
const auto &kernelDescriptor = kernel.getKernelInfo().kernelDescriptor;
|
2020-12-07 22:41:52 +08:00
|
|
|
|
2020-04-03 16:15:38 +08:00
|
|
|
// Allocate some memory for the interface descriptor
|
|
|
|
auto pInterfaceDescriptor = getInterfaceDescriptor(indirectHeap, offsetInterfaceDescriptor, inlineInterfaceDescriptor);
|
2020-04-28 00:55:26 +08:00
|
|
|
auto interfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData;
|
2020-04-03 16:15:38 +08:00
|
|
|
|
|
|
|
// Program the kernel start pointer
|
2022-01-14 07:57:00 +08:00
|
|
|
interfaceDescriptor.setKernelStartPointer(static_cast<uint32_t>(kernelStartOffset & std::numeric_limits<uint32_t>::max()));
|
2020-04-03 16:15:38 +08:00
|
|
|
|
|
|
|
// # of threads in thread group should be based on LWS.
|
2020-04-28 00:55:26 +08:00
|
|
|
interfaceDescriptor.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2020-04-28 00:55:26 +08:00
|
|
|
interfaceDescriptor.setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2021-03-22 19:06:23 +08:00
|
|
|
auto slmTotalSize = kernel.getSlmTotalSize();
|
2020-12-18 22:47:42 +08:00
|
|
|
|
2022-04-26 22:28:18 +08:00
|
|
|
EncodeDispatchKernel<GfxFamily>::setGrfInfo(&interfaceDescriptor, kernelDescriptor.kernelAttributes.numGrfRequired,
|
|
|
|
sizeCrossThreadData, sizePerThreadData, hardwareInfo);
|
2022-12-29 06:59:37 +08:00
|
|
|
auto &productHelper = device.getProductHelper();
|
2022-12-13 00:43:41 +08:00
|
|
|
productHelper.updateIddCommand(&interfaceDescriptor, kernelDescriptor.kernelAttributes.numGrfRequired,
|
|
|
|
kernelDescriptor.kernelAttributes.threadArbitrationPolicy);
|
2022-05-17 04:34:53 +08:00
|
|
|
|
2023-01-09 23:56:36 +08:00
|
|
|
EncodeDispatchKernel<GfxFamily>::appendAdditionalIDDFields(&interfaceDescriptor, device.getRootDeviceEnvironment(), threadsPerThreadGroup,
|
2022-04-26 22:28:18 +08:00
|
|
|
slmTotalSize, SlmPolicy::SlmPolicyNone);
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2020-04-28 00:55:26 +08:00
|
|
|
interfaceDescriptor.setBindingTablePointer(static_cast<uint32_t>(bindingTablePointer));
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2022-01-14 07:57:00 +08:00
|
|
|
if constexpr (GfxFamily::supportsSampler) {
|
2022-09-30 21:20:48 +08:00
|
|
|
if (device.getDeviceInfo().imageSupport) {
|
|
|
|
interfaceDescriptor.setSamplerStatePointer(static_cast<uint32_t>(offsetSamplerState));
|
|
|
|
}
|
2022-01-14 07:57:00 +08:00
|
|
|
}
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2020-11-27 17:22:59 +08:00
|
|
|
EncodeDispatchKernel<GfxFamily>::adjustBindingTablePrefetch(interfaceDescriptor, numSamplers, bindingTablePrefetchSize);
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2023-01-09 23:56:36 +08:00
|
|
|
auto &gfxCoreHelper = device.getGfxCoreHelper();
|
2020-06-09 03:49:11 +08:00
|
|
|
auto programmableIDSLMSize =
|
2023-01-09 23:56:36 +08:00
|
|
|
static_cast<SHARED_LOCAL_MEMORY_SIZE>(gfxCoreHelper.computeSlmValues(hardwareInfo, slmTotalSize));
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2021-03-11 23:53:39 +08:00
|
|
|
if (DebugManager.flags.OverrideSlmAllocationSize.get() != -1) {
|
|
|
|
programmableIDSLMSize = static_cast<SHARED_LOCAL_MEMORY_SIZE>(DebugManager.flags.OverrideSlmAllocationSize.get());
|
|
|
|
}
|
|
|
|
|
2020-04-28 00:55:26 +08:00
|
|
|
interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize);
|
2020-07-23 03:17:50 +08:00
|
|
|
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(interfaceDescriptor,
|
2022-01-20 19:23:30 +08:00
|
|
|
kernelDescriptor.kernelAttributes.barrierCount,
|
2020-11-13 18:41:45 +08:00
|
|
|
hardwareInfo);
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2020-04-28 00:55:26 +08:00
|
|
|
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(&interfaceDescriptor, preemptionMode);
|
2022-08-03 20:22:30 +08:00
|
|
|
|
2022-10-26 21:49:19 +08:00
|
|
|
EncodeDispatchKernel<GfxFamily>::adjustInterfaceDescriptorData(interfaceDescriptor, device, hardwareInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired);
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2020-04-28 00:55:26 +08:00
|
|
|
*pInterfaceDescriptor = interfaceDescriptor;
|
2020-04-03 16:15:38 +08:00
|
|
|
return (size_t)offsetInterfaceDescriptor;
|
|
|
|
}
|
|
|
|
|
2022-10-21 22:16:43 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
void HardwareCommandsHelper<GfxFamily>::programPerThreadData(
|
|
|
|
bool localIdsGenerationByRuntime,
|
|
|
|
size_t &sizePerThreadData,
|
|
|
|
size_t &sizePerThreadDataTotal,
|
|
|
|
LinearStream &ioh,
|
|
|
|
const Kernel &kernel,
|
|
|
|
const size_t localWorkSize[3]) {
|
|
|
|
if (localIdsGenerationByRuntime) {
|
|
|
|
Vec3<uint16_t> group = {static_cast<uint16_t>(localWorkSize[0]),
|
|
|
|
static_cast<uint16_t>(localWorkSize[1]),
|
|
|
|
static_cast<uint16_t>(localWorkSize[2])};
|
|
|
|
sizePerThreadData = kernel.getLocalIdsSizePerThread();
|
|
|
|
sizePerThreadDataTotal = kernel.getLocalIdsSizeForGroup(group);
|
|
|
|
auto dest = ioh.getSpace(sizePerThreadDataTotal);
|
|
|
|
kernel.setLocalIdsForGroup(group, dest);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-10-04 21:01:52 +08:00
|
|
|
template <typename GfxFamily>
|
2020-04-03 16:15:38 +08:00
|
|
|
size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
|
|
|
LinearStream &commandStream,
|
|
|
|
IndirectHeap &dsh,
|
|
|
|
IndirectHeap &ioh,
|
|
|
|
IndirectHeap &ssh,
|
2018-10-04 21:01:52 +08:00
|
|
|
Kernel &kernel,
|
2020-04-03 16:15:38 +08:00
|
|
|
uint64_t kernelStartOffset,
|
|
|
|
uint32_t simd,
|
|
|
|
const size_t localWorkSize[3],
|
2022-08-03 20:22:30 +08:00
|
|
|
const uint32_t threadGroupCount,
|
2020-04-03 16:15:38 +08:00
|
|
|
const uint64_t offsetInterfaceDescriptorTable,
|
|
|
|
uint32_t &interfaceDescriptorIndex,
|
|
|
|
PreemptionMode preemptionMode,
|
2021-11-04 23:40:13 +08:00
|
|
|
WALKER_TYPE *walkerCmd,
|
2020-04-03 16:15:38 +08:00
|
|
|
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
|
2020-11-13 18:41:45 +08:00
|
|
|
bool localIdsGenerationByRuntime,
|
2020-11-19 02:39:32 +08:00
|
|
|
const Device &device) {
|
2018-10-04 21:01:52 +08:00
|
|
|
|
2020-04-03 16:15:38 +08:00
|
|
|
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
|
|
|
|
|
|
|
DEBUG_BREAK_IF(simd != 1 && simd != 8 && simd != 16 && simd != 32);
|
2021-03-22 23:26:03 +08:00
|
|
|
auto inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
|
2020-04-03 16:15:38 +08:00
|
|
|
|
|
|
|
// Copy the kernel over to the ISH
|
2021-03-22 23:26:03 +08:00
|
|
|
const auto &kernelInfo = kernel.getKernelInfo();
|
2020-04-03 16:15:38 +08:00
|
|
|
|
2020-04-22 04:40:21 +08:00
|
|
|
ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
|
|
|
|
2023-03-08 06:43:53 +08:00
|
|
|
size_t dstBindingTablePointer = HardwareCommandsHelper<GfxFamily>::checkForAdditionalBTAndSetBTPointer(ssh, kernel);
|
2020-04-03 16:15:38 +08:00
|
|
|
|
|
|
|
// Copy our sampler state if it exists
|
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
|
|
|
const auto &samplerTable = kernelInfo.kernelDescriptor.payloadMappings.samplerTable;
|
2020-04-03 16:15:38 +08:00
|
|
|
uint32_t samplerCount = 0;
|
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
|
|
|
uint32_t samplerStateOffset = 0;
|
|
|
|
if (isValidOffset(samplerTable.tableOffset) && isValidOffset(samplerTable.borderColor)) {
|
|
|
|
samplerCount = samplerTable.numSamplers;
|
|
|
|
samplerStateOffset = EncodeStates<GfxFamily>::copySamplerState(&dsh, samplerTable.tableOffset,
|
|
|
|
samplerCount, samplerTable.borderColor,
|
2021-08-25 18:28:05 +08:00
|
|
|
kernel.getDynamicStateHeap(), device.getBindlessHeapsHelper(),
|
2022-11-10 08:05:51 +08:00
|
|
|
device.getRootDeviceEnvironment());
|
2020-04-03 16:15:38 +08:00
|
|
|
}
|
2019-12-17 15:55:09 +08:00
|
|
|
|
2020-04-03 16:15:38 +08:00
|
|
|
auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2];
|
|
|
|
auto threadsPerThreadGroup = static_cast<uint32_t>(getThreadsPerWG(simd, localWorkItems));
|
|
|
|
|
2021-03-22 19:06:23 +08:00
|
|
|
uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize();
|
2020-04-03 16:15:38 +08:00
|
|
|
|
|
|
|
size_t offsetCrossThreadData = HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
|
|
|
|
ioh, kernel, inlineDataProgrammingRequired,
|
2021-03-22 19:06:23 +08:00
|
|
|
walkerCmd, sizeCrossThreadData);
|
2020-04-03 16:15:38 +08:00
|
|
|
|
|
|
|
size_t sizePerThreadDataTotal = 0;
|
|
|
|
size_t sizePerThreadData = 0;
|
|
|
|
|
|
|
|
HardwareCommandsHelper<GfxFamily>::programPerThreadData(
|
|
|
|
localIdsGenerationByRuntime,
|
2022-10-21 22:16:43 +08:00
|
|
|
sizePerThreadData,
|
|
|
|
sizePerThreadDataTotal,
|
2018-10-04 21:01:52 +08:00
|
|
|
ioh,
|
2020-04-03 16:15:38 +08:00
|
|
|
kernel,
|
2022-10-21 22:16:43 +08:00
|
|
|
localWorkSize);
|
2018-10-04 21:01:52 +08:00
|
|
|
|
2020-04-03 16:15:38 +08:00
|
|
|
uint64_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable + interfaceDescriptorIndex * sizeof(INTERFACE_DESCRIPTOR_DATA);
|
2018-10-04 21:01:52 +08:00
|
|
|
|
2022-09-13 20:47:58 +08:00
|
|
|
auto bindingTablePrefetchSize = 0;
|
|
|
|
if (EncodeSurfaceState<GfxFamily>::doBindingTablePrefetch()) {
|
|
|
|
bindingTablePrefetchSize = std::min(31u, static_cast<uint32_t>(kernel.getNumberOfBindingTableStates()));
|
2020-04-03 16:15:38 +08:00
|
|
|
}
|
2018-10-18 12:38:18 +08:00
|
|
|
|
2020-04-03 16:15:38 +08:00
|
|
|
HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
|
|
|
dsh,
|
|
|
|
offsetInterfaceDescriptor,
|
|
|
|
kernelStartOffset,
|
|
|
|
sizeCrossThreadData,
|
|
|
|
sizePerThreadData,
|
|
|
|
dstBindingTablePointer,
|
|
|
|
samplerStateOffset,
|
|
|
|
samplerCount,
|
2022-08-03 20:22:30 +08:00
|
|
|
threadGroupCount,
|
2020-04-03 16:15:38 +08:00
|
|
|
threadsPerThreadGroup,
|
|
|
|
kernel,
|
|
|
|
bindingTablePrefetchSize,
|
|
|
|
preemptionMode,
|
2020-11-13 18:41:45 +08:00
|
|
|
inlineInterfaceDescriptor,
|
2020-12-07 22:41:52 +08:00
|
|
|
device);
|
2018-10-18 12:38:18 +08:00
|
|
|
|
|
|
|
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
2020-04-03 16:15:38 +08:00
|
|
|
PatchInfoData patchInfoData(kernelStartOffset, 0, PatchInfoAllocationType::InstructionHeap, dsh.getGraphicsAllocation()->getGpuAddress(), offsetInterfaceDescriptor, PatchInfoAllocationType::DynamicStateHeap);
|
|
|
|
kernel.getPatchInfoDataList().push_back(patchInfoData);
|
2018-10-18 12:38:18 +08:00
|
|
|
}
|
|
|
|
|
2020-04-03 16:15:38 +08:00
|
|
|
// Program media state flush to set interface descriptor offset
|
|
|
|
sendMediaStateFlush(
|
|
|
|
commandStream,
|
|
|
|
interfaceDescriptorIndex);
|
|
|
|
|
|
|
|
DEBUG_BREAK_IF(offsetCrossThreadData % 64 != 0);
|
|
|
|
walkerCmd->setIndirectDataStartAddress(static_cast<uint32_t>(offsetCrossThreadData));
|
|
|
|
setInterfaceDescriptorOffset(walkerCmd, interfaceDescriptorIndex);
|
|
|
|
|
|
|
|
auto indirectDataLength = alignUp(static_cast<uint32_t>(sizeCrossThreadData + sizePerThreadDataTotal),
|
2021-11-04 23:40:13 +08:00
|
|
|
WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
|
2020-04-03 16:15:38 +08:00
|
|
|
walkerCmd->setIndirectDataLength(indirectDataLength);
|
|
|
|
|
|
|
|
return offsetCrossThreadData;
|
|
|
|
}
|
|
|
|
|
2019-10-11 15:17:06 +08:00
|
|
|
template <typename GfxFamily>
|
2021-03-22 23:26:03 +08:00
|
|
|
bool HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(const Kernel &kernel) {
|
2020-04-03 16:15:38 +08:00
|
|
|
auto checkKernelForInlineData = true;
|
|
|
|
if (DebugManager.flags.EnablePassInlineData.get() != -1) {
|
|
|
|
checkKernelForInlineData = !!DebugManager.flags.EnablePassInlineData.get();
|
|
|
|
}
|
|
|
|
if (checkKernelForInlineData) {
|
2021-03-22 23:26:03 +08:00
|
|
|
return kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.passInlineData;
|
2020-04-03 16:15:38 +08:00
|
|
|
}
|
|
|
|
return false;
|
2019-10-11 15:17:06 +08:00
|
|
|
}
|
|
|
|
|
2019-10-30 17:23:26 +08:00
|
|
|
template <typename GfxFamily>
|
2021-03-22 23:26:03 +08:00
|
|
|
bool HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(const Kernel &kernel) {
|
|
|
|
return kernel.getKernelInfo().kernelDescriptor.kernelAttributes.numLocalIdChannels > 0;
|
2020-04-03 16:15:38 +08:00
|
|
|
}
|
|
|
|
|
2023-03-08 06:43:53 +08:00
|
|
|
template <typename GfxFamily>
|
|
|
|
size_t HardwareCommandsHelper<GfxFamily>::checkForAdditionalBTAndSetBTPointer(IndirectHeap &ssh, const Kernel &kernel) {
|
|
|
|
size_t dstBindingTablePointer{0u};
|
|
|
|
const auto &kernelInfo = kernel.getKernelInfo();
|
|
|
|
if (isGTPinInitialized && 0u == kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries) {
|
|
|
|
dstBindingTablePointer = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(ssh, 1u,
|
|
|
|
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
|
|
|
|
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
|
|
|
|
} else {
|
|
|
|
dstBindingTablePointer = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(ssh, kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries,
|
|
|
|
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
|
|
|
|
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
|
|
|
|
}
|
|
|
|
return dstBindingTablePointer;
|
|
|
|
}
|
|
|
|
|
2019-03-26 18:59:46 +08:00
|
|
|
} // namespace NEO
|