2018-10-04 21:01:52 +08:00
/*
2021-05-17 02:51:16 +08:00
* Copyright (C) 2019-2021 Intel Corporation
2018-10-04 21:01:52 +08:00
*
* SPDX-License-Identifier: MIT
*
*/
2020-04-03 16:15:38 +08:00
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/csr_definitions.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/helpers/address_patch.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/basic_math.h"
2020-06-09 03:49:11 +08:00
#include "shared/source/helpers/hw_helper.h"
2020-10-07 21:09:42 +08:00
#include "shared/source/helpers/local_id_gen.h"
2020-04-03 16:15:38 +08:00
#include "shared/source/helpers/ptr_math.h"
#include "shared/source/helpers/string.h"
#include "shared/source/indirect_heap/indirect_heap.h"
2020-02-24 17:22:30 +08:00
2020-04-03 16:15:38 +08:00
#include "opencl/source/cl_device/cl_device.h"
#include "opencl/source/context/context.h"
#include "opencl/source/helpers/dispatch_info.h"
2020-02-23 05:50:57 +08:00
#include "opencl/source/kernel/kernel.h"
2020-04-03 16:15:38 +08:00
#include "opencl/source/program/block_kernel_manager.h"
#include "opencl/source/scheduler/scheduler_kernel.h"
#include <cstring>
2018-10-04 21:01:52 +08:00
2019-03-26 18:59:46 +08:00
namespace NEO {
2018-10-04 21:01:52 +08:00
template <typename GfxFamily>
2021-03-22 23:26:03 +08:00
size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredDSH(const Kernel &kernel) {
2020-04-03 16:15:38 +08:00
using INTERFACE_DESCRIPTOR_DATA = typename GfxFamily::INTERFACE_DESCRIPTOR_DATA;
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
2021-03-22 23:26:03 +08:00
const auto &samplerTable = kernel.getKernelInfo().kernelDescriptor.payloadMappings.samplerTable;
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
auto samplerCount = samplerTable.numSamplers;
2020-04-03 16:15:38 +08:00
auto totalSize = samplerCount
? alignUp(samplerCount * sizeof(SAMPLER_STATE), INTERFACE_DESCRIPTOR_DATA::SAMPLERSTATEPOINTER_ALIGN_SIZE)
: 0;
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
auto borderColorSize = samplerTable.borderColor;
2020-10-06 17:54:04 +08:00
borderColorSize = alignUp(borderColorSize + EncodeStates<GfxFamily>::alignIndirectStatePointer - 1,
EncodeStates<GfxFamily>::alignIndirectStatePointer);
2020-04-03 16:15:38 +08:00
totalSize += borderColorSize + additionalSizeRequiredDsh();
2021-03-29 18:34:25 +08:00
DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.isVmeKernel()));
2020-04-03 16:15:38 +08:00
2020-10-06 17:54:04 +08:00
return alignUp(totalSize, EncodeStates<GfxFamily>::alignInterfaceDescriptorData);
2020-04-03 16:15:38 +08:00
}
template <typename GfxFamily>
2021-03-22 23:26:03 +08:00
size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredIOH(const Kernel &kernel,
size_t localWorkSize) {
2020-04-03 16:15:38 +08:00
typedef typename GfxFamily::WALKER_TYPE WALKER_TYPE;
2021-03-22 23:26:03 +08:00
const auto &kernelInfo = kernel.getKernelInfo();
2018-10-04 21:01:52 +08:00
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
auto numChannels = kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels;
2020-04-03 16:15:38 +08:00
uint32_t grfSize = sizeof(typename GfxFamily::GRF);
2021-03-22 19:06:23 +08:00
return alignUp((kernel.getCrossThreadDataSize() +
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
getPerThreadDataSizeTotal(kernelInfo.getMaxSimdSize(), grfSize, numChannels, localWorkSize)),
2020-04-03 16:15:38 +08:00
WALKER_TYPE::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
2018-10-04 21:01:52 +08:00
}
template <typename GfxFamily>
2021-03-22 23:26:03 +08:00
size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredSSH(const Kernel &kernel) {
2020-04-03 16:15:38 +08:00
typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE;
2021-03-22 23:26:03 +08:00
auto sizeSSH = kernel.getSurfaceStateHeapSize();
2020-04-03 16:15:38 +08:00
sizeSSH += sizeSSH ? BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE : 0;
return sizeSSH;
}
template <typename SizeGetterT, typename... ArgsT>
size_t getSizeRequired(const MultiDispatchInfo &multiDispatchInfo, SizeGetterT &&getSize, ArgsT... args) {
size_t totalSize = 0;
auto it = multiDispatchInfo.begin();
for (auto e = multiDispatchInfo.end(); it != e; ++it) {
totalSize = alignUp(totalSize, MemoryConstants::cacheLineSize);
totalSize += getSize(*it, std::forward<ArgsT>(args)...);
}
totalSize = alignUp(totalSize, MemoryConstants::pageSize);
return totalSize;
2018-10-04 21:01:52 +08:00
}
template <typename GfxFamily>
2020-04-03 16:15:38 +08:00
size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredDSH(
const MultiDispatchInfo &multiDispatchInfo) {
2021-03-22 23:26:03 +08:00
return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredDSH(*dispatchInfo.getKernel()); });
2018-10-04 21:01:52 +08:00
}
2019-01-31 21:47:55 +08:00
template <typename GfxFamily>
2020-04-03 16:15:38 +08:00
size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredIOH(
const MultiDispatchInfo &multiDispatchInfo) {
2020-11-19 02:39:32 +08:00
return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredIOH(
*dispatchInfo.getKernel(),
Math::computeTotalElementsCount(dispatchInfo.getLocalWorkgroupSize())); });
2019-01-31 21:47:55 +08:00
}
2018-10-04 21:01:52 +08:00
template <typename GfxFamily>
2020-04-03 16:15:38 +08:00
size_t HardwareCommandsHelper<GfxFamily>::getTotalSizeRequiredSSH(
const MultiDispatchInfo &multiDispatchInfo) {
2021-03-22 23:26:03 +08:00
return getSizeRequired(multiDispatchInfo, [](const DispatchInfo &dispatchInfo) { return getSizeRequiredSSH(*dispatchInfo.getKernel()); });
2020-04-03 16:15:38 +08:00
}
template <typename GfxFamily>
2021-03-22 23:26:03 +08:00
size_t HardwareCommandsHelper<GfxFamily>::getSshSizeForExecutionModel(const Kernel &kernel) {
2020-04-03 16:15:38 +08:00
typedef typename GfxFamily::BINDING_TABLE_STATE BINDING_TABLE_STATE;
size_t totalSize = 0;
BlockKernelManager *blockManager = kernel.getProgram()->getBlockKernelManager();
uint32_t blockCount = static_cast<uint32_t>(blockManager->getCount());
uint32_t maxBindingTableCount = 0;
totalSize = BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE - 1;
for (uint32_t i = 0; i < blockCount; i++) {
const KernelInfo *pBlockInfo = blockManager->getBlockKernelInfo(i);
2020-05-26 15:36:04 +08:00
totalSize += pBlockInfo->heapInfo.SurfaceStateHeapSize;
2020-04-03 16:15:38 +08:00
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
2018-10-04 21:01:52 +08:00
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
maxBindingTableCount = std::max(maxBindingTableCount, static_cast<uint32_t>(pBlockInfo->kernelDescriptor.payloadMappings.bindingTable.numEntries));
2020-04-03 16:15:38 +08:00
}
SchedulerKernel &scheduler = kernel.getContext().getSchedulerKernel();
2021-03-22 23:26:03 +08:00
totalSize += getSizeRequiredSSH(scheduler);
2020-04-03 16:15:38 +08:00
totalSize += maxBindingTableCount * sizeof(BINDING_TABLE_STATE) * DeviceQueue::interfaceDescriptorEntries;
totalSize = alignUp(totalSize, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
return totalSize;
2018-10-04 21:01:52 +08:00
}
template <typename GfxFamily>
2020-04-03 16:15:38 +08:00
size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
const IndirectHeap &indirectHeap,
uint64_t offsetInterfaceDescriptor,
uint64_t kernelStartOffset,
size_t sizeCrossThreadData,
size_t sizePerThreadData,
size_t bindingTablePointer,
size_t offsetSamplerState,
uint32_t numSamplers,
uint32_t threadsPerThreadGroup,
const Kernel &kernel,
uint32_t bindingTablePrefetchSize,
PreemptionMode preemptionMode,
2020-11-13 18:41:45 +08:00
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
2020-12-07 22:41:52 +08:00
const Device &device) {
2020-04-03 16:15:38 +08:00
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
2020-12-04 19:57:11 +08:00
using SHARED_LOCAL_MEMORY_SIZE = typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
2020-04-03 16:15:38 +08:00
2020-12-07 22:41:52 +08:00
const auto &hardwareInfo = device.getHardwareInfo();
2020-04-03 16:15:38 +08:00
// Allocate some memory for the interface descriptor
auto pInterfaceDescriptor = getInterfaceDescriptor(indirectHeap, offsetInterfaceDescriptor, inlineInterfaceDescriptor);
2020-04-28 00:55:26 +08:00
auto interfaceDescriptor = GfxFamily::cmdInitInterfaceDescriptorData;
2020-04-03 16:15:38 +08:00
// Program the kernel start pointer
2020-04-28 00:55:26 +08:00
interfaceDescriptor.setKernelStartPointerHigh(kernelStartOffset >> 32);
interfaceDescriptor.setKernelStartPointer((uint32_t)kernelStartOffset);
2020-04-03 16:15:38 +08:00
// # of threads in thread group should be based on LWS.
2020-04-28 00:55:26 +08:00
interfaceDescriptor.setNumberOfThreadsInGpgpuThreadGroup(threadsPerThreadGroup);
2020-04-03 16:15:38 +08:00
2020-04-28 00:55:26 +08:00
interfaceDescriptor.setDenormMode(INTERFACE_DESCRIPTOR_DATA::DENORM_MODE_SETBYKERNEL);
2020-04-03 16:15:38 +08:00
2021-03-22 19:06:23 +08:00
auto slmTotalSize = kernel.getSlmTotalSize();
2020-12-18 22:47:42 +08:00
2021-03-22 23:26:03 +08:00
setGrfInfo(&interfaceDescriptor, kernel, sizeCrossThreadData, sizePerThreadData);
2020-12-18 22:47:42 +08:00
EncodeDispatchKernel<GfxFamily>::appendAdditionalIDDFields(&interfaceDescriptor, hardwareInfo, threadsPerThreadGroup, slmTotalSize, SlmPolicy::SlmPolicyNone);
2020-04-03 16:15:38 +08:00
2020-04-28 00:55:26 +08:00
interfaceDescriptor.setBindingTablePointer(static_cast<uint32_t>(bindingTablePointer));
2020-04-03 16:15:38 +08:00
2020-04-28 00:55:26 +08:00
interfaceDescriptor.setSamplerStatePointer(static_cast<uint32_t>(offsetSamplerState));
2020-04-03 16:15:38 +08:00
2020-11-27 17:22:59 +08:00
EncodeDispatchKernel<GfxFamily>::adjustBindingTablePrefetch(interfaceDescriptor, numSamplers, bindingTablePrefetchSize);
2020-04-03 16:15:38 +08:00
2020-06-09 03:49:11 +08:00
auto programmableIDSLMSize =
2020-12-18 22:47:42 +08:00
static_cast<SHARED_LOCAL_MEMORY_SIZE>(HwHelperHw<GfxFamily>::get().computeSlmValues(hardwareInfo, slmTotalSize));
2020-04-03 16:15:38 +08:00
2021-03-11 23:53:39 +08:00
if (DebugManager.flags.OverrideSlmAllocationSize.get() != -1) {
programmableIDSLMSize = static_cast<SHARED_LOCAL_MEMORY_SIZE>(DebugManager.flags.OverrideSlmAllocationSize.get());
}
2020-04-28 00:55:26 +08:00
interfaceDescriptor.setSharedLocalMemorySize(programmableIDSLMSize);
2020-07-23 03:17:50 +08:00
EncodeDispatchKernel<GfxFamily>::programBarrierEnable(interfaceDescriptor,
2021-03-22 23:26:03 +08:00
kernel.getKernelInfo().kernelDescriptor.kernelAttributes.barrierCount,
2020-11-13 18:41:45 +08:00
hardwareInfo);
2020-04-03 16:15:38 +08:00
2020-04-28 00:55:26 +08:00
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(&interfaceDescriptor, preemptionMode);
2020-11-13 18:41:45 +08:00
EncodeDispatchKernel<GfxFamily>::adjustInterfaceDescriptorData(interfaceDescriptor, hardwareInfo);
2020-04-03 16:15:38 +08:00
2020-04-28 00:55:26 +08:00
*pInterfaceDescriptor = interfaceDescriptor;
2020-04-03 16:15:38 +08:00
return (size_t)offsetInterfaceDescriptor;
}
2018-10-04 21:01:52 +08:00
template <typename GfxFamily>
2020-04-03 16:15:38 +08:00
size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
LinearStream &commandStream,
IndirectHeap &dsh,
IndirectHeap &ioh,
IndirectHeap &ssh,
2018-10-04 21:01:52 +08:00
Kernel &kernel,
2020-04-03 16:15:38 +08:00
uint64_t kernelStartOffset,
uint32_t simd,
const size_t localWorkSize[3],
const uint64_t offsetInterfaceDescriptorTable,
uint32_t &interfaceDescriptorIndex,
PreemptionMode preemptionMode,
WALKER_TYPE<GfxFamily> *walkerCmd,
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
2020-11-13 18:41:45 +08:00
bool localIdsGenerationByRuntime,
2020-11-19 02:39:32 +08:00
const Device &device) {
2018-10-04 21:01:52 +08:00
2020-04-03 16:15:38 +08:00
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
2020-11-19 02:39:32 +08:00
auto rootDeviceIndex = device.getRootDeviceIndex();
2020-04-03 16:15:38 +08:00
DEBUG_BREAK_IF(simd != 1 && simd != 8 && simd != 16 && simd != 32);
2021-03-22 23:26:03 +08:00
auto inlineDataProgrammingRequired = HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(kernel);
2020-04-03 16:15:38 +08:00
// Copy the kernel over to the ISH
2021-03-22 23:26:03 +08:00
const auto &kernelInfo = kernel.getKernelInfo();
2020-04-03 16:15:38 +08:00
2020-04-22 04:40:21 +08:00
ssh.align(BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
2020-11-19 02:39:32 +08:00
kernel.patchBindlessSurfaceStateOffsets(device, ssh.getUsed());
2020-04-22 04:40:21 +08:00
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
auto dstBindingTablePointer = EncodeSurfaceState<GfxFamily>::pushBindingTableAndSurfaceStates(ssh, kernelInfo.kernelDescriptor.payloadMappings.bindingTable.numEntries,
2021-03-22 23:26:03 +08:00
kernel.getSurfaceStateHeap(), kernel.getSurfaceStateHeapSize(),
2021-03-22 19:06:23 +08:00
kernel.getNumberOfBindingTableStates(), kernel.getBindingTableOffset());
2020-04-03 16:15:38 +08:00
// Copy our sampler state if it exists
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
const auto &samplerTable = kernelInfo.kernelDescriptor.payloadMappings.samplerTable;
2020-04-03 16:15:38 +08:00
uint32_t samplerCount = 0;
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
uint32_t samplerStateOffset = 0;
if (isValidOffset(samplerTable.tableOffset) && isValidOffset(samplerTable.borderColor)) {
samplerCount = samplerTable.numSamplers;
samplerStateOffset = EncodeStates<GfxFamily>::copySamplerState(&dsh, samplerTable.tableOffset,
samplerCount, samplerTable.borderColor,
2021-03-22 23:26:03 +08:00
kernel.getDynamicStateHeap(), device.getBindlessHeapsHelper());
2020-04-03 16:15:38 +08:00
}
2019-12-17 15:55:09 +08:00
2020-04-03 16:15:38 +08:00
auto localWorkItems = localWorkSize[0] * localWorkSize[1] * localWorkSize[2];
auto threadsPerThreadGroup = static_cast<uint32_t>(getThreadsPerWG(simd, localWorkItems));
Remove PatchTokens from KernelInfo
Use KernelDescriptor instead of patchTokens stored in KernelInfo's
patchInfo.
Removed: SPatchMediaInterfaceDescriptorLoad, SPatchAllocateLocalSurface,
SPatchMediaVFEState(slot 0), SPatchMediaVFEState(slot 1),
SPatchInterfaceDescriptorData, SPatchSamplerStateArray,
SPatchBindingTableState, SPatchDataParameterBuffer,
SPatchDataParameterStream, SPatchThreadPayload,
SPatchKernelAttributesInfo, SPatchAllocateStatelessPrivateSurface,
SPatchAllocateSyncBuffer,
SPatchAllocateStatelessConstantMemorySurfaceWithInitialization,
SPatchAllocateStatelessGlobalMemorySurfaceWithInitialization,
SPatchAllocateSystemThreadSurface.
Related-To: NEO-4729
Signed-off-by: Krystian Chmielewski <krystian.chmielewski@intel.com>
2021-03-04 17:14:23 +08:00
auto numChannels = static_cast<uint32_t>(kernelInfo.kernelDescriptor.kernelAttributes.numLocalIdChannels);
2020-04-03 16:15:38 +08:00
2021-03-22 19:06:23 +08:00
uint32_t sizeCrossThreadData = kernel.getCrossThreadDataSize();
2020-04-03 16:15:38 +08:00
size_t offsetCrossThreadData = HardwareCommandsHelper<GfxFamily>::sendCrossThreadData(
ioh, kernel, inlineDataProgrammingRequired,
2021-03-22 19:06:23 +08:00
walkerCmd, sizeCrossThreadData);
2020-04-03 16:15:38 +08:00
size_t sizePerThreadDataTotal = 0;
size_t sizePerThreadData = 0;
HardwareCommandsHelper<GfxFamily>::programPerThreadData(
sizePerThreadData,
localIdsGenerationByRuntime,
2018-10-04 21:01:52 +08:00
ioh,
simd,
numChannels,
localWorkSize,
2020-04-03 16:15:38 +08:00
kernel,
sizePerThreadDataTotal,
2020-12-07 22:41:52 +08:00
localWorkItems,
rootDeviceIndex);
2018-10-04 21:01:52 +08:00
2020-04-03 16:15:38 +08:00
uint64_t offsetInterfaceDescriptor = offsetInterfaceDescriptorTable + interfaceDescriptorIndex * sizeof(INTERFACE_DESCRIPTOR_DATA);
2018-10-04 21:01:52 +08:00
2021-03-22 19:06:23 +08:00
auto bindingTablePrefetchSize = std::min(31u, static_cast<uint32_t>(kernel.getNumberOfBindingTableStates()));
2020-04-03 16:15:38 +08:00
if (resetBindingTablePrefetch(kernel)) {
bindingTablePrefetchSize = 0;
}
2018-10-18 12:38:18 +08:00
2020-04-03 16:15:38 +08:00
HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
dsh,
offsetInterfaceDescriptor,
kernelStartOffset,
sizeCrossThreadData,
sizePerThreadData,
dstBindingTablePointer,
samplerStateOffset,
samplerCount,
threadsPerThreadGroup,
kernel,
bindingTablePrefetchSize,
preemptionMode,
2020-11-13 18:41:45 +08:00
inlineInterfaceDescriptor,
2020-12-07 22:41:52 +08:00
device);
2018-10-18 12:38:18 +08:00
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
2020-04-03 16:15:38 +08:00
PatchInfoData patchInfoData(kernelStartOffset, 0, PatchInfoAllocationType::InstructionHeap, dsh.getGraphicsAllocation()->getGpuAddress(), offsetInterfaceDescriptor, PatchInfoAllocationType::DynamicStateHeap);
kernel.getPatchInfoDataList().push_back(patchInfoData);
2018-10-18 12:38:18 +08:00
}
2020-04-03 16:15:38 +08:00
// Program media state flush to set interface descriptor offset
sendMediaStateFlush(
commandStream,
interfaceDescriptorIndex);
DEBUG_BREAK_IF(offsetCrossThreadData % 64 != 0);
walkerCmd->setIndirectDataStartAddress(static_cast<uint32_t>(offsetCrossThreadData));
setInterfaceDescriptorOffset(walkerCmd, interfaceDescriptorIndex);
auto indirectDataLength = alignUp(static_cast<uint32_t>(sizeCrossThreadData + sizePerThreadDataTotal),
WALKER_TYPE<GfxFamily>::INDIRECTDATASTARTADDRESS_ALIGN_SIZE);
walkerCmd->setIndirectDataLength(indirectDataLength);
return offsetCrossThreadData;
}
template <typename GfxFamily>
void HardwareCommandsHelper<GfxFamily>::updatePerThreadDataTotal(
size_t &sizePerThreadData,
uint32_t &simd,
uint32_t &numChannels,
size_t &sizePerThreadDataTotal,
size_t &localWorkItems) {
uint32_t grfSize = sizeof(typename GfxFamily::GRF);
sizePerThreadData = getPerThreadSizeLocalIDs(simd, grfSize, numChannels);
uint32_t localIdSizePerThread = PerThreadDataHelper::getLocalIdSizePerThread(simd, grfSize, numChannels);
localIdSizePerThread = std::max(localIdSizePerThread, grfSize);
sizePerThreadDataTotal = getThreadsPerWG(simd, localWorkItems) * localIdSizePerThread;
DEBUG_BREAK_IF(sizePerThreadDataTotal == 0); // Hardware requires at least 1 GRF of perThreadData for each thread in thread group
2018-10-18 12:38:18 +08:00
}
2019-10-11 15:17:06 +08:00
template <typename GfxFamily>
2021-03-22 23:26:03 +08:00
bool HardwareCommandsHelper<GfxFamily>::inlineDataProgrammingRequired(const Kernel &kernel) {
2020-04-03 16:15:38 +08:00
auto checkKernelForInlineData = true;
if (DebugManager.flags.EnablePassInlineData.get() != -1) {
checkKernelForInlineData = !!DebugManager.flags.EnablePassInlineData.get();
}
if (checkKernelForInlineData) {
2021-03-22 23:26:03 +08:00
return kernel.getKernelInfo().kernelDescriptor.kernelAttributes.flags.passInlineData;
2020-04-03 16:15:38 +08:00
}
return false;
2019-10-11 15:17:06 +08:00
}
2019-10-30 17:23:26 +08:00
template <typename GfxFamily>
2021-03-22 23:26:03 +08:00
bool HardwareCommandsHelper<GfxFamily>::kernelUsesLocalIds(const Kernel &kernel) {
return kernel.getKernelInfo().kernelDescriptor.kernelAttributes.numLocalIdChannels > 0;
2020-04-03 16:15:38 +08:00
}
2019-03-26 18:59:46 +08:00
} // namespace NEO