Revert "Cleanup MediaInterfaceDescriptorLoad logic in command encoder"

This reverts commit 349af0bd5e.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2022-11-07 01:25:43 +01:00
committed by Compute-Runtime-Automation
parent 349af0bd5e
commit ddbaa5e8c9
8 changed files with 46 additions and 137 deletions

View File

@@ -76,7 +76,7 @@ struct EncodeDispatchKernel {
static void setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData,
const size_t &sizePerThreadData, const HardwareInfo &hwInfo);
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset, const HardwareInfo &hwInfo);
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
const size_t *lws,
@@ -115,9 +115,9 @@ struct EncodeDispatchKernel {
static constexpr bool shouldUpdateGlobalAtomics(bool &currentVal, bool refVal, bool updateCurrent);
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t numIddsPerBlock);
static size_t getSizeRequiredDsh(const KernelInfo &kernelInfo);
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
inline static uint32_t additionalSizeRequiredDsh(uint32_t numIddsPerBlock);
inline static uint32_t additionalSizeRequiredDsh();
};
template <typename GfxFamily>

View File

@@ -518,7 +518,9 @@ template <typename Family>
void EncodeSurfaceState<Family>::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {}
template <typename Family>
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset, const HardwareInfo &hwInfo) {
using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
if (container.nextIddInBlock == container.getNumIddPerBlock()) {
if (ApiSpecificConfig::getBindlessConfiguration()) {
@@ -530,6 +532,26 @@ void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &con
sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock()));
}
container.nextIddInBlock = 0;
if (container.isHeapDirty(HeapType::DYNAMIC_STATE)) {
PipeControlArgs syncArgs;
syncArgs.dcFlushEnable = MemorySynchronizationCommands<Family>::getDcFlushEnable(true, hwInfo);
syncArgs.hdcPipelineFlush = true;
MemorySynchronizationCommands<Family>::addSingleBarrier(*container.getCommandStream(), syncArgs);
STATE_BASE_ADDRESS sba;
EncodeStateBaseAddressArgs<Family> encodeStateBaseAddressArgs = {
&container,
sba,
0,
false,
false,
false};
EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
container.setDirtyStateForAllHeaps(false);
}
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
}
iddOffset = container.nextIddInBlock;
@@ -698,17 +720,17 @@ template <typename Family>
constexpr bool EncodeDispatchKernel<Family>::shouldUpdateGlobalAtomics(bool &currentVal, bool refVal, bool updateCurrent) { return false; }
template <typename Family>
size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t numIddsPerBlock) {
size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelInfo &kernelInfo) {
using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA;
constexpr auto samplerStateSize = sizeof(typename Family::SAMPLER_STATE);
const auto numSamplers = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
const auto additionalDshSize = additionalSizeRequiredDsh(numIddsPerBlock);
const auto numSamplers = kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers;
const auto additionalDshSize = additionalSizeRequiredDsh();
if (numSamplers == 0U) {
return alignUp(additionalDshSize, EncodeStates<Family>::alignInterfaceDescriptorData);
}
size_t size = kernelDescriptor.payloadMappings.samplerTable.tableOffset -
kernelDescriptor.payloadMappings.samplerTable.borderColor;
size_t size = kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset -
kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor;
size = alignUp(size, EncodeStates<Family>::alignIndirectStatePointer);
size += numSamplers * samplerStateSize;

View File

@@ -104,15 +104,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
if (!ApiSpecificConfig::getBindlessConfiguration()) {
auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
auto dshSizeRequired = NEO::EncodeDispatchKernel<Family>::getSizeRequiredDsh(kernelDescriptor, container.getNumIddPerBlock());
if (heap->getAvailableSpace() <= dshSizeRequired) {
heap = container.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, heap->getUsed() + heap->getAvailableSpace(), 0);
UNRECOVERABLE_IF(!heap);
}
}
uint32_t samplerStateOffset = 0;
uint32_t samplerCount = 0;
@@ -175,9 +166,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.dispatchInterface->getPerThreadData(), sizePerThreadDataForWholeGroup);
}
uint32_t numIDD = 0u;
void *iddPtr = getInterfaceDescriptor(container, numIDD);
auto slmSizeNew = args.dispatchInterface->getSlmTotalSize();
bool dirtyHeaps = container.isAnyHeapDirty();
bool flush = container.slmSize != slmSizeNew || dirtyHeaps || args.requiresUncachedMocs;
@@ -211,12 +199,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
if (container.slmSize != slmSizeNew) {
EncodeL3State<Family>::encode(container, slmSizeNew != 0u);
container.slmSize = slmSizeNew;
if (container.nextIddInBlock != container.getNumIddPerBlock()) {
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
}
}
}
if (numIDD == 0 || flush) {
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
}
uint32_t numIDD = 0u;
void *iddPtr = getInterfaceDescriptor(container, numIDD, hwInfo);
cmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
cmd.setIndirectDataLength(sizeThreadData);
@@ -554,8 +545,8 @@ template <typename Family>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t numIddsPerBlock) {
return sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA) * numIddsPerBlock;
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh() {
return sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA);
}
} // namespace NEO

View File

@@ -775,7 +775,7 @@ template <typename Family>
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {}
template <typename Family>
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t) {
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh() {
return 0u;
}