Share front end state updates between regular and immediate command lists

Related-To: NEO-5019

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-09-20 16:46:15 +00:00
committed by Compute-Runtime-Automation
parent 7f0619e6b9
commit 5986a7199a
16 changed files with 415 additions and 22 deletions

View File

@ -74,7 +74,9 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi
this->staticWorkPartitioningEnabled = true;
}
this->systolicModeConfigurable = HwInfoConfig::get(hwInfo.platform.eProductFamily)->isSystolicModeConfigurable(hwInfo);
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
this->systolicModeConfigurable = hwInfoConfig->isSystolicModeConfigurable(hwInfo);
hwInfoConfig->fillFrontEndPropertiesSupportStructure(feSupportFlags, hwInfo);
}
CommandStreamReceiver::~CommandStreamReceiver() {

View File

@ -392,6 +392,7 @@ class CommandStreamReceiver {
LinearStream commandStream;
StreamProperties streamProperties{};
FrontEndPropertiesSupport feSupportFlags{};
// offset for debug state is 1kbyte, tag writes can use multiple offsets for multiple partitions and each offset can vary per platform
const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte;

View File

@ -177,6 +177,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void unregisterDirectSubmissionFromController();
constexpr bool isGlobalAtomicsProgrammingRequired(bool currentValue) const;
void createKernelArgsBufferAllocation() override;
void handleFrontEndStateTransition(DispatchFlags &dispatchFlags);
HeapDirtyState dshState;
HeapDirtyState iohState;

View File

@ -308,13 +308,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
}
if (!logicalStateHelper) {
if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo) {
setMediaVFEStateDirty(true);
}
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType) {
setMediaVFEStateDirty(true);
}
handleFrontEndStateTransition(dispatchFlags);
}
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
@ -970,14 +964,14 @@ inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &cs
lastKernelExecutionType = dispatchFlags.kernelExecutionType;
}
auto &hwInfo = peekHwInfo();
auto isCooperative = dispatchFlags.kernelExecutionType == KernelExecutionType::Concurrent;
auto disableOverdispatch = (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet);
streamProperties.frontEndState.setProperties(isCooperative, dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced(), hwInfo);
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto engineGroupType = hwHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo);
auto pVfeState = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csr, hwInfo, engineGroupType);
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo) &&
(dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet);
streamProperties.frontEndState.setProperties(lastKernelExecutionType == KernelExecutionType::Concurrent,
dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced(), hwInfo);
PreambleHelper<GfxFamily>::programVfeState(
pVfeState, hwInfo, requiredScratchSize, getScratchPatchAddress(),
maxFrontEndThreads, streamProperties, logicalStateHelper.get());
@ -1477,4 +1471,29 @@ void CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission() {
this->flushSmallTask(commandStream, commandStreamStart);
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(DispatchFlags &dispatchFlags) {
if (streamProperties.frontEndState.disableOverdispatch.value != -1) {
lastAdditionalKernelExecInfo = streamProperties.frontEndState.disableOverdispatch.value == 1 ? AdditionalKernelExecInfo::DisableOverdispatch : AdditionalKernelExecInfo::NotSet;
}
if (streamProperties.frontEndState.computeDispatchAllWalkerEnable.value != -1) {
lastKernelExecutionType = streamProperties.frontEndState.computeDispatchAllWalkerEnable.value == 1 ? KernelExecutionType::Concurrent : KernelExecutionType::Default;
}
if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo &&
feSupportFlags.disableOverdispatch) {
setMediaVFEStateDirty(true);
}
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType &&
feSupportFlags.computeDispatchAllWalker) {
setMediaVFEStateDirty(true);
}
if ((streamProperties.frontEndState.disableEUFusion.value == -1 || dispatchFlags.disableEUFusion != !!streamProperties.frontEndState.disableEUFusion.value) &&
feSupportFlags.disableEuFusion) {
setMediaVFEStateDirty(true);
}
}
} // namespace NEO

View File

@ -38,6 +38,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::getCmdSizeForPrologue;
using BaseClass::getScratchPatchAddress;
using BaseClass::getScratchSpaceController;
using BaseClass::handleFrontEndStateTransition;
using BaseClass::indirectHeap;
using BaseClass::iohState;
using BaseClass::isBlitterDirectSubmissionEnabled;
@ -80,6 +81,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::downloadAllocationImpl;
using BaseClass::CommandStreamReceiver::executionEnvironment;
using BaseClass::CommandStreamReceiver::experimentalCmdBuffer;
using BaseClass::CommandStreamReceiver::feSupportFlags;
using BaseClass::CommandStreamReceiver::flushStamp;
using BaseClass::CommandStreamReceiver::globalFenceAllocation;
using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod;
@ -91,6 +93,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::isEnginePrologueSent;
using BaseClass::CommandStreamReceiver::isPreambleSent;
using BaseClass::CommandStreamReceiver::isStateSipSent;
using BaseClass::CommandStreamReceiver::lastAdditionalKernelExecInfo;
using BaseClass::CommandStreamReceiver::lastKernelExecutionType;
using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
using BaseClass::CommandStreamReceiver::lastMemoryCompressionState;

View File

@ -231,6 +231,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
using CommandStreamReceiver::clearColorAllocation;
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::dispatchMode;
using CommandStreamReceiver::feSupportFlags;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::isPreambleSent;
using CommandStreamReceiver::latestFlushedTaskCount;