mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 05:56:36 +08:00
Revert "Cleanup MediaInterfaceDescriptorLoad logic in command encoder"
This reverts commit 349af0bd5e.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
349af0bd5e
commit
ddbaa5e8c9
@@ -76,7 +76,7 @@ struct EncodeDispatchKernel {
|
||||
static void setGrfInfo(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, uint32_t numGrf, const size_t &sizeCrossThreadData,
|
||||
const size_t &sizePerThreadData, const HardwareInfo &hwInfo);
|
||||
|
||||
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset);
|
||||
static void *getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset, const HardwareInfo &hwInfo);
|
||||
|
||||
static bool isRuntimeLocalIdsGenerationRequired(uint32_t activeChannels,
|
||||
const size_t *lws,
|
||||
@@ -115,9 +115,9 @@ struct EncodeDispatchKernel {
|
||||
|
||||
static constexpr bool shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent);
|
||||
|
||||
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t numIddsPerBlock);
|
||||
static size_t getSizeRequiredDsh(const KernelInfo &kernelInfo);
|
||||
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
|
||||
inline static uint32_t additionalSizeRequiredDsh(uint32_t numIddsPerBlock);
|
||||
inline static uint32_t additionalSizeRequiredDsh();
|
||||
};
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -518,7 +518,9 @@ template <typename Family>
|
||||
void EncodeSurfaceState<Family>::encodeImplicitScalingParams(const EncodeSurfaceStateArgs &args) {}
|
||||
|
||||
template <typename Family>
|
||||
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset) {
|
||||
void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &container, uint32_t &iddOffset, const HardwareInfo &hwInfo) {
|
||||
|
||||
using STATE_BASE_ADDRESS = typename Family::STATE_BASE_ADDRESS;
|
||||
|
||||
if (container.nextIddInBlock == container.getNumIddPerBlock()) {
|
||||
if (ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
@@ -530,6 +532,26 @@ void *EncodeDispatchKernel<Family>::getInterfaceDescriptor(CommandContainer &con
|
||||
sizeof(INTERFACE_DESCRIPTOR_DATA) * container.getNumIddPerBlock()));
|
||||
}
|
||||
container.nextIddInBlock = 0;
|
||||
|
||||
if (container.isHeapDirty(HeapType::DYNAMIC_STATE)) {
|
||||
PipeControlArgs syncArgs;
|
||||
syncArgs.dcFlushEnable = MemorySynchronizationCommands<Family>::getDcFlushEnable(true, hwInfo);
|
||||
syncArgs.hdcPipelineFlush = true;
|
||||
MemorySynchronizationCommands<Family>::addSingleBarrier(*container.getCommandStream(), syncArgs);
|
||||
|
||||
STATE_BASE_ADDRESS sba;
|
||||
EncodeStateBaseAddressArgs<Family> encodeStateBaseAddressArgs = {
|
||||
&container,
|
||||
sba,
|
||||
0,
|
||||
false,
|
||||
false,
|
||||
false};
|
||||
EncodeStateBaseAddress<Family>::encode(encodeStateBaseAddressArgs);
|
||||
container.setDirtyStateForAllHeaps(false);
|
||||
}
|
||||
|
||||
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
|
||||
}
|
||||
|
||||
iddOffset = container.nextIddInBlock;
|
||||
@@ -698,17 +720,17 @@ template <typename Family>
|
||||
constexpr bool EncodeDispatchKernel<Family>::shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent) { return false; }
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t numIddsPerBlock) {
|
||||
size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelInfo &kernelInfo) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA;
|
||||
constexpr auto samplerStateSize = sizeof(typename Family::SAMPLER_STATE);
|
||||
const auto numSamplers = kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
||||
const auto additionalDshSize = additionalSizeRequiredDsh(numIddsPerBlock);
|
||||
const auto numSamplers = kernelInfo.kernelDescriptor.payloadMappings.samplerTable.numSamplers;
|
||||
const auto additionalDshSize = additionalSizeRequiredDsh();
|
||||
if (numSamplers == 0U) {
|
||||
return alignUp(additionalDshSize, EncodeStates<Family>::alignInterfaceDescriptorData);
|
||||
}
|
||||
|
||||
size_t size = kernelDescriptor.payloadMappings.samplerTable.tableOffset -
|
||||
kernelDescriptor.payloadMappings.samplerTable.borderColor;
|
||||
size_t size = kernelInfo.kernelDescriptor.payloadMappings.samplerTable.tableOffset -
|
||||
kernelInfo.kernelDescriptor.payloadMappings.samplerTable.borderColor;
|
||||
size = alignUp(size, EncodeStates<Family>::alignIndirectStatePointer);
|
||||
|
||||
size += numSamplers * samplerStateSize;
|
||||
|
||||
@@ -104,15 +104,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, args.preemptionMode);
|
||||
|
||||
if (!ApiSpecificConfig::getBindlessConfiguration()) {
|
||||
auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
auto dshSizeRequired = NEO::EncodeDispatchKernel<Family>::getSizeRequiredDsh(kernelDescriptor, container.getNumIddPerBlock());
|
||||
if (heap->getAvailableSpace() <= dshSizeRequired) {
|
||||
heap = container.getHeapWithRequiredSizeAndAlignment(HeapType::DYNAMIC_STATE, heap->getUsed() + heap->getAvailableSpace(), 0);
|
||||
UNRECOVERABLE_IF(!heap);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t samplerStateOffset = 0;
|
||||
uint32_t samplerCount = 0;
|
||||
|
||||
@@ -175,9 +166,6 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
args.dispatchInterface->getPerThreadData(), sizePerThreadDataForWholeGroup);
|
||||
}
|
||||
|
||||
uint32_t numIDD = 0u;
|
||||
void *iddPtr = getInterfaceDescriptor(container, numIDD);
|
||||
|
||||
auto slmSizeNew = args.dispatchInterface->getSlmTotalSize();
|
||||
bool dirtyHeaps = container.isAnyHeapDirty();
|
||||
bool flush = container.slmSize != slmSizeNew || dirtyHeaps || args.requiresUncachedMocs;
|
||||
@@ -211,12 +199,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
if (container.slmSize != slmSizeNew) {
|
||||
EncodeL3State<Family>::encode(container, slmSizeNew != 0u);
|
||||
container.slmSize = slmSizeNew;
|
||||
|
||||
if (container.nextIddInBlock != container.getNumIddPerBlock()) {
|
||||
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (numIDD == 0 || flush) {
|
||||
EncodeMediaInterfaceDescriptorLoad<Family>::encode(container);
|
||||
}
|
||||
uint32_t numIDD = 0u;
|
||||
void *iddPtr = getInterfaceDescriptor(container, numIDD, hwInfo);
|
||||
|
||||
cmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
|
||||
cmd.setIndirectDataLength(sizeThreadData);
|
||||
@@ -554,8 +545,8 @@ template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t numIddsPerBlock) {
|
||||
return sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA) * numIddsPerBlock;
|
||||
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh() {
|
||||
return sizeof(typename Family::INTERFACE_DESCRIPTOR_DATA);
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -775,7 +775,7 @@ template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh(uint32_t) {
|
||||
uint32_t EncodeDispatchKernel<Family>::additionalSizeRequiredDsh() {
|
||||
return 0u;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user