mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-27 15:53:13 +08:00
Add cross regular and intermediate command lists base address state transitions
- updates coming from regular list are updated in csr last sent variables - all per context and per kernel transitions kept in single place - state updates from intermediate to regular are set in csr properties - global atomics support duplicates removed Related-To: NEO-5055 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
140e59810f
commit
bf2072c3ea
@@ -136,8 +136,6 @@ struct EncodeDispatchKernel {
|
||||
|
||||
static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
static constexpr bool shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent);
|
||||
|
||||
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount);
|
||||
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
|
||||
inline static size_t additionalSizeRequiredDsh(uint32_t iddCount);
|
||||
|
||||
@@ -727,9 +727,6 @@ void EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(INTERFACE_DESCRIPT
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf) {}
|
||||
|
||||
template <typename Family>
|
||||
constexpr bool EncodeDispatchKernel<Family>::shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool updateCurrent) { return false; }
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA;
|
||||
|
||||
@@ -220,8 +220,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
}
|
||||
}
|
||||
|
||||
if (shouldUpdateGlobalAtomics(container.lastSentUseGlobalAtomics, args.useGlobalAtomics, args.partitionCount > 1) ||
|
||||
container.isAnyHeapDirty() ||
|
||||
if (container.isAnyHeapDirty() ||
|
||||
args.requiresUncachedMocs) {
|
||||
|
||||
PipeControlArgs syncArgs;
|
||||
|
||||
@@ -92,6 +92,7 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi
|
||||
auto &productHelper = getProductHelper();
|
||||
productHelper.fillFrontEndPropertiesSupportStructure(feSupportFlags, hwInfo);
|
||||
productHelper.fillPipelineSelectPropertiesSupportStructure(pipelineSupportFlags, hwInfo);
|
||||
productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaSupportFlags);
|
||||
}
|
||||
|
||||
CommandStreamReceiver::~CommandStreamReceiver() {
|
||||
@@ -530,8 +531,12 @@ void CommandStreamReceiver::initProgrammingFlags() {
|
||||
lastSentL3Config = 0;
|
||||
lastMediaSamplerConfig = -1;
|
||||
lastPreemptionMode = PreemptionMode::Initial;
|
||||
|
||||
latestSentStatelessMocsConfig = CacheSettings::unknownMocs;
|
||||
this->streamProperties.stateBaseAddress.statelessMocs = {};
|
||||
|
||||
lastSentUseGlobalAtomics = false;
|
||||
this->streamProperties.stateBaseAddress.globalAtomics = {};
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::programForAubSubCapture(bool wasActiveInPreviousEnqueue, bool isActive) {
|
||||
|
||||
@@ -427,6 +427,7 @@ class CommandStreamReceiver {
|
||||
StreamProperties streamProperties{};
|
||||
FrontEndPropertiesSupport feSupportFlags{};
|
||||
PipelineSelectPropertiesSupport pipelineSupportFlags{};
|
||||
StateBaseAddressPropertiesSupport sbaSupportFlags{};
|
||||
|
||||
uint64_t totalMemoryUsed = 0u;
|
||||
|
||||
|
||||
@@ -185,10 +185,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
bool checkPlatformSupportsGpuIdleImplicitFlush() const;
|
||||
void configurePostSyncWriteOffset();
|
||||
void unregisterDirectSubmissionFromController();
|
||||
constexpr bool isGlobalAtomicsProgrammingRequired(bool currentValue) const;
|
||||
void createKernelArgsBufferAllocation() override;
|
||||
void handleFrontEndStateTransition(DispatchFlags &dispatchFlags);
|
||||
void handlePipelineSelectStateTransition(DispatchFlags &dispatchFlags);
|
||||
void handleFrontEndStateTransition(const DispatchFlags &dispatchFlags);
|
||||
void handlePipelineSelectStateTransition(const DispatchFlags &dispatchFlags);
|
||||
void handleStateBaseAddressStateTransition(const DispatchFlags &dispatchFlags, bool &isStateBaseAddressDirty);
|
||||
|
||||
HeapDirtyState dshState;
|
||||
HeapDirtyState iohState;
|
||||
|
||||
@@ -480,39 +480,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
|
||||
auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty;
|
||||
|
||||
auto mocsIndex = latestSentStatelessMocsConfig;
|
||||
|
||||
if (dispatchFlags.l3CacheSettings != L3CachingSettings::NotApplicable) {
|
||||
auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff;
|
||||
auto l1On = dispatchFlags.l3CacheSettings == L3CachingSettings::l3AndL1On;
|
||||
mocsIndex = gfxCoreHelper.getMocsIndex(*device.getGmmHelper(), l3On, l1On);
|
||||
}
|
||||
|
||||
if (mocsIndex != latestSentStatelessMocsConfig) {
|
||||
isStateBaseAddressDirty = true;
|
||||
latestSentStatelessMocsConfig = mocsIndex;
|
||||
}
|
||||
this->streamProperties.stateBaseAddress.setPropertyStatelessMocs(mocsIndex);
|
||||
|
||||
if (this->isGlobalAtomicsProgrammingRequired(dispatchFlags.useGlobalAtomics) && (this->isMultiOsContextCapable() || dispatchFlags.areMultipleSubDevicesInContext)) {
|
||||
isStateBaseAddressDirty = true;
|
||||
lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics;
|
||||
|
||||
this->streamProperties.stateBaseAddress.setPropertyGlobalAtomics(lastSentUseGlobalAtomics, rootDeviceEnvironment, false);
|
||||
}
|
||||
handleStateBaseAddressStateTransition(dispatchFlags, isStateBaseAddressDirty);
|
||||
|
||||
bool debuggingEnabled = device.getDebugger() != nullptr;
|
||||
bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false;
|
||||
|
||||
auto memoryCompressionState = lastMemoryCompressionState;
|
||||
if (dispatchFlags.memoryCompressionState != MemoryCompressionState::NotApplicable) {
|
||||
memoryCompressionState = dispatchFlags.memoryCompressionState;
|
||||
}
|
||||
if (memoryCompressionState != lastMemoryCompressionState) {
|
||||
isStateBaseAddressDirty = true;
|
||||
lastMemoryCompressionState = memoryCompressionState;
|
||||
}
|
||||
|
||||
// Reprogram state base address if required
|
||||
if (isStateBaseAddressDirty || sourceLevelDebuggerActive) {
|
||||
EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(commandStreamCSR, rootDeviceEnvironment, isRcs(), this->dcFlushSupport);
|
||||
@@ -546,13 +518,13 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
ssh, // ssh
|
||||
device.getGmmHelper(), // gmmHelper
|
||||
&hwInfo, // hwInfo
|
||||
mocsIndex, // statelessMocsIndex
|
||||
memoryCompressionState, // memoryCompressionState
|
||||
this->latestSentStatelessMocsConfig, // statelessMocsIndex
|
||||
this->lastMemoryCompressionState, // memoryCompressionState
|
||||
true, // setInstructionStateBaseAddress
|
||||
true, // setGeneralStateBaseAddress
|
||||
false, // useGlobalHeapsBaseAddress
|
||||
isMultiOsContextCapable(), // isMultiOsContextCapable
|
||||
dispatchFlags.useGlobalAtomics, // useGlobalAtomics
|
||||
this->lastSentUseGlobalAtomics, // useGlobalAtomics
|
||||
dispatchFlags.areMultipleSubDevicesInContext, // areMultipleSubDevicesInContext
|
||||
false, // overrideSurfaceStateBaseAddress
|
||||
debuggingEnabled || device.isDebuggerActive() // isDebuggerActive
|
||||
@@ -1625,11 +1597,6 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForComputeMode() {
|
||||
return EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(this->peekRootDeviceEnvironment(), hasSharedHandles(), isRcs());
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
constexpr bool CommandStreamReceiverHw<GfxFamily>::isGlobalAtomicsProgrammingRequired(bool currentVal) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::createKernelArgsBufferAllocation() {
|
||||
}
|
||||
@@ -1640,7 +1607,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSu
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(DispatchFlags &dispatchFlags) {
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(const DispatchFlags &dispatchFlags) {
|
||||
if (streamProperties.frontEndState.disableOverdispatch.value != -1) {
|
||||
lastAdditionalKernelExecInfo = streamProperties.frontEndState.disableOverdispatch.value == 1 ? AdditionalKernelExecInfo::DisableOverdispatch : AdditionalKernelExecInfo::NotSet;
|
||||
}
|
||||
@@ -1665,7 +1632,7 @@ void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(DispatchF
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handlePipelineSelectStateTransition(DispatchFlags &dispatchFlags) {
|
||||
void CommandStreamReceiverHw<GfxFamily>::handlePipelineSelectStateTransition(const DispatchFlags &dispatchFlags) {
|
||||
if (streamProperties.pipelineSelect.mediaSamplerDopClockGate.value != -1) {
|
||||
this->lastMediaSamplerConfig = static_cast<int8_t>(streamProperties.pipelineSelect.mediaSamplerDopClockGate.value);
|
||||
}
|
||||
@@ -1685,4 +1652,48 @@ bool CommandStreamReceiverHw<GfxFamily>::directSubmissionRelaxedOrderingEnabled(
|
||||
(blitterDirectSubmission.get() && blitterDirectSubmission->isRelaxedOrderingEnabled()));
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleStateBaseAddressStateTransition(const DispatchFlags &dispatchFlags, bool &isStateBaseAddressDirty) {
|
||||
auto &rootDeviceEnvironment = this->peekRootDeviceEnvironment();
|
||||
|
||||
if (this->streamProperties.stateBaseAddress.statelessMocs.value != -1) {
|
||||
this->latestSentStatelessMocsConfig = static_cast<uint32_t>(this->streamProperties.stateBaseAddress.statelessMocs.value);
|
||||
}
|
||||
auto mocsIndex = this->latestSentStatelessMocsConfig;
|
||||
if (dispatchFlags.l3CacheSettings != L3CachingSettings::NotApplicable) {
|
||||
auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff;
|
||||
auto l1On = dispatchFlags.l3CacheSettings == L3CachingSettings::l3AndL1On;
|
||||
|
||||
auto &gfxCoreHelper = getGfxCoreHelper();
|
||||
mocsIndex = gfxCoreHelper.getMocsIndex(*rootDeviceEnvironment.getGmmHelper(), l3On, l1On);
|
||||
}
|
||||
if (mocsIndex != this->latestSentStatelessMocsConfig) {
|
||||
isStateBaseAddressDirty = true;
|
||||
this->latestSentStatelessMocsConfig = mocsIndex;
|
||||
}
|
||||
this->streamProperties.stateBaseAddress.setPropertyStatelessMocs(mocsIndex);
|
||||
|
||||
auto memoryCompressionState = this->lastMemoryCompressionState;
|
||||
if (dispatchFlags.memoryCompressionState != MemoryCompressionState::NotApplicable) {
|
||||
memoryCompressionState = dispatchFlags.memoryCompressionState;
|
||||
}
|
||||
if (memoryCompressionState != this->lastMemoryCompressionState) {
|
||||
isStateBaseAddressDirty = true;
|
||||
this->lastMemoryCompressionState = memoryCompressionState;
|
||||
}
|
||||
|
||||
if (this->sbaSupportFlags.globalAtomics) {
|
||||
if (this->streamProperties.stateBaseAddress.globalAtomics.value != -1) {
|
||||
this->lastSentUseGlobalAtomics = !!this->streamProperties.stateBaseAddress.globalAtomics.value;
|
||||
}
|
||||
|
||||
bool globalAtomics = (this->isMultiOsContextCapable() || dispatchFlags.areMultipleSubDevicesInContext) && dispatchFlags.useGlobalAtomics;
|
||||
if (this->lastSentUseGlobalAtomics != globalAtomics) {
|
||||
isStateBaseAddressDirty = true;
|
||||
this->lastSentUseGlobalAtomics = globalAtomics;
|
||||
}
|
||||
this->streamProperties.stateBaseAddress.setPropertyGlobalAtomics(globalAtomics, rootDeviceEnvironment, false);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -73,15 +73,6 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCR
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
constexpr bool EncodeDispatchKernel<Family>::shouldUpdateGlobalAtomics(bool ¤tVal, bool refVal, bool predicate) {
|
||||
if (predicate && currentVal != refVal) {
|
||||
currentVal = refVal;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
template <>
|
||||
void adjustL3ControlField<Family>(void *l3ControlBuffer) { ; }
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -71,11 +71,6 @@ size_t CommandStreamReceiverHw<Family>::getCmdSizeForPerDssBackedBuffer(const Ha
|
||||
template <>
|
||||
void CommandStreamReceiverHw<Family>::addPipeControlBefore3dState(LinearStream &commandStream, DispatchFlags &dispatchFlags) {}
|
||||
|
||||
template <>
|
||||
constexpr bool CommandStreamReceiverHw<Family>::isGlobalAtomicsProgrammingRequired(bool currentValue) const {
|
||||
return currentValue != this->lastSentUseGlobalAtomics;
|
||||
}
|
||||
|
||||
template <>
|
||||
void BlitCommandsHelper<Family>::appendClearColor(const BlitProperties &blitProperties, typename Family::XY_BLOCK_COPY_BLT &blitCmd) {
|
||||
using XY_BLOCK_COPY_BLT = typename Family::XY_BLOCK_COPY_BLT;
|
||||
|
||||
@@ -53,12 +53,10 @@ struct XeHpCore {
|
||||
|
||||
struct StateBaseAddressStateSupport {
|
||||
static constexpr bool globalAtomics = true;
|
||||
static constexpr bool statelessMocs = true;
|
||||
static constexpr bool bindingTablePoolBaseAddress = true;
|
||||
};
|
||||
|
||||
struct PipelineSelectStateSupport {
|
||||
static constexpr bool modeSelected = true;
|
||||
static constexpr bool mediaSamplerDopClockGate = true;
|
||||
static constexpr bool systolicMode = true;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user