Change dispatch kernel interface to provide already prepared heap objects

Related-To: NEO-5055

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-01-26 14:33:36 +00:00
committed by Compute-Runtime-Automation
parent 401344137c
commit 5097ef4825
10 changed files with 106 additions and 35 deletions

View File

@@ -18,30 +18,33 @@
#include <list>
namespace NEO {
enum class SlmPolicy;
class BindlessHeapsHelper;
class Gmm;
class GmmHelper;
class IndirectHeap;
class LogicalStateHelper;
class ProductHelper;
struct DeviceInfo;
struct DispatchKernelEncoderI;
struct EncodeSurfaceStateArgs;
struct HardwareInfo;
struct KernelDescriptor;
struct KernelInfo;
struct MiFlushArgs;
struct PipeControlArgs;
struct PipelineSelectArgs;
enum class SlmPolicy;
struct DispatchKernelEncoderI;
struct RootDeviceEnvironment;
class BindlessHeapsHelper;
class GmmHelper;
class LogicalStateHelper;
class IndirectHeap;
class LogicalStateHelper;
class Gmm;
struct HardwareInfo;
struct KernelInfo;
struct StateComputeModeProperties;
struct KernelDescriptor;
class ProductHelper;
struct EncodeDispatchKernelArgs {
uint64_t eventAddress = 0ull;
Device *device = nullptr;
DispatchKernelEncoderI *dispatchInterface = nullptr;
IndirectHeap *surfaceStateHeap = nullptr;
IndirectHeap *dynamicStateHeap = nullptr;
const void *threadGroupDimensions = nullptr;
std::list<void *> *additionalCommands = nullptr;
PreemptionMode preemptionMode = PreemptionMode::Initial;
@@ -138,6 +141,7 @@ struct EncodeDispatchKernel {
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor);
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
inline static uint32_t additionalSizeRequiredDsh();
static bool isDshNeeded(const DeviceInfo &deviceInfo);
};
template <typename GfxFamily>

View File

@@ -98,7 +98,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
if (!isBindlessKernel) {
container.prepareBindfulSsh();
if (bindingTableStateCount > 0u) {
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
}
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh, bindingTableStateCount,
args.dispatchInterface->getSurfaceStateHeapData(),
@@ -114,20 +117,22 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
uint32_t samplerCount = 0;
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
if (!ApiSpecificConfig::getBindlessConfiguration()) {
auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
auto dshSizeRequired = NEO::EncodeDispatchKernel<Family>::getSizeRequiredDsh(kernelDescriptor);
if (heap->getAvailableSpace() <= dshSizeRequired) {
heap = container.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, heap->getMaxAvailableSpace(), 0);
UNRECOVERABLE_IF(!heap);
auto dsHeap = args.dynamicStateHeap;
if (dsHeap == nullptr) {
if (!ApiSpecificConfig::getBindlessConfiguration()) {
auto dsHeap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
auto dshSizeRequired = NEO::EncodeDispatchKernel<Family>::getSizeRequiredDsh(kernelDescriptor);
if (dsHeap->getAvailableSpace() <= dshSizeRequired) {
dsHeap = container.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, dsHeap->getMaxAvailableSpace(), 0);
UNRECOVERABLE_IF(!dsHeap);
}
}
dsHeap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
}
auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
UNRECOVERABLE_IF(!heap);
UNRECOVERABLE_IF(!dsHeap);
samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
samplerStateOffset = EncodeStates<Family>::copySamplerState(heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
samplerStateOffset = EncodeStates<Family>::copySamplerState(dsHeap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
kernelDescriptor.payloadMappings.samplerTable.numSamplers,
kernelDescriptor.payloadMappings.samplerTable.borderColor,
args.dispatchInterface->getDynamicStateHeapData(),
@@ -382,6 +387,11 @@ inline void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const Roo
template <typename Family>
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const RootDeviceEnvironment &rootDeviceEnvironment, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize, SlmPolicy slmPolicy) {}
template <typename Family>
inline bool EncodeDispatchKernel<Family>::isDshNeeded(const DeviceInfo &deviceInfo) {
return true;
}
template <typename Family>
inline void EncodeComputeMode<Family>::adjustPipelineSelect(CommandContainer &container, const NEO::KernelDescriptor &kernelDescriptor) {
}

View File

@@ -118,7 +118,10 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
kernelDescriptor.kernelAttributes.flags.usesImages) {
container.prepareBindfulSsh();
if (bindingTableStateCount > 0u) {
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
auto ssh = args.surfaceStateHeap;
if (ssh == nullptr) {
ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
}
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh, bindingTableStateCount,
args.dispatchInterface->getSurfaceStateHeapData(),
@@ -132,17 +135,21 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
uint32_t samplerCount = 0;
if (args.device->getDeviceInfo().imageSupport) {
if constexpr (Family::supportsSampler) {
if constexpr (Family::supportsSampler) {
if (args.device->getDeviceInfo().imageSupport) {
uint32_t samplerStateOffset = 0;
if (kernelDescriptor.payloadMappings.samplerTable.numSamplers > 0) {
auto heap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
UNRECOVERABLE_IF(!heap);
auto dsHeap = args.dynamicStateHeap;
if (dsHeap == nullptr) {
dsHeap = ApiSpecificConfig::getBindlessConfiguration() ? args.device->getBindlessHeapsHelper()->getHeap(BindlessHeapsHelper::GLOBAL_DSH) : container.getIndirectHeap(HeapType::DYNAMIC_STATE);
}
UNRECOVERABLE_IF(!dsHeap);
samplerCount = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
samplerStateOffset = EncodeStates<Family>::copySamplerState(
heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
dsHeap, kernelDescriptor.payloadMappings.samplerTable.tableOffset,
kernelDescriptor.payloadMappings.samplerTable.numSamplers, kernelDescriptor.payloadMappings.samplerTable.borderColor,
args.dispatchInterface->getDynamicStateHeapData(),
args.device->getBindlessHeapsHelper(), rootDeviceEnvironment);
@@ -496,6 +503,14 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
}
}
template <typename Family>
inline bool EncodeDispatchKernel<Family>::isDshNeeded(const DeviceInfo &deviceInfo) {
if constexpr (Family::supportsSampler) {
return deviceInfo.imageSupport;
}
return false;
}
template <typename Family>
void EncodeStateBaseAddress<Family>::setSbaAddressesForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd) {
sbaAddress.BindlessSurfaceStateBaseAddress = sbaCmd.getBindlessSurfaceStateBaseAddress();

View File

@@ -51,6 +51,11 @@ HWTEST_F(CommandEncodeStatesTest, givenDispatchInterfaceWhenDispatchKernelThenWa
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
if (EncodeDispatchKernel<FamilyType>::isDshNeeded(pDevice->getDeviceInfo())) {
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
}
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
GenCmdList commands;
@@ -373,6 +378,8 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNumSamplersOneWhenDispa
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
@@ -929,7 +936,10 @@ HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenS
auto usedBefore = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
if (EncodeDispatchKernel<FamilyType>::isDshNeeded(pDevice->getDeviceInfo())) {
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
}
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
auto usedAfter = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE)->getUsed();
@@ -1439,6 +1449,8 @@ HWTEST_F(BindlessCommandEncodeStatesTest, givenGlobalBindlessHeapsWhenDispatchin
0,
pDevice,
dispatchInterface.get(),
nullptr,
nullptr,
dims,
nullptr,
NEO::PreemptionMode::Disabled,
@@ -1488,6 +1500,8 @@ HWTEST_F(BindlessCommandEncodeStatesTest, givenBindlessModeDisabledelWithSampler
0,
pDevice,
dispatchInterface.get(),
nullptr,
nullptr,
dims,
nullptr,
NEO::PreemptionMode::Disabled,

View File

@@ -315,6 +315,9 @@ HWTEST2_F(CommandEncodeStatesTest, giveNumSamplersOneWhenDispatchKernelThensampl
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.surfaceStateHeap = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
dispatchArgs.dynamicStateHeap = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
GenCmdList commands;

View File

@@ -31,10 +31,13 @@ EncodeDispatchKernelArgs CommandEncodeStatesFixture::createDefaultDispatchKernel
DispatchKernelEncoderI *dispatchInterface,
const void *threadGroupDimensions,
bool requiresUncachedMocs) {
EncodeDispatchKernelArgs args{
0, // eventAddress
device, // device
dispatchInterface, // dispatchInterface
nullptr, // surfaceStateHeap
nullptr, // dynamicStateHeap
threadGroupDimensions, // threadGroupDimensions
nullptr, // additionalCommands
PreemptionMode::Disabled, // preemptionMode