Add cross regular and intermediate command lists base address state transitions

- updates coming from regular list are updated in csr last sent variables
- all per context and per kernel transitions kept in single place
- state updates from intermediate to regular are set in csr properties
- global atomics support duplicates removed

Related-To: NEO-5055

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-02-02 18:57:24 +00:00
committed by Compute-Runtime-Automation
parent 140e59810f
commit bf2072c3ea
16 changed files with 363 additions and 337 deletions

View File

@@ -281,27 +281,27 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
} }
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
eventAddress, // eventAddress eventAddress, // eventAddress
neoDevice, // device neoDevice, // device
kernel, // dispatchInterface kernel, // dispatchInterface
ssh, // surfaceStateHeap ssh, // surfaceStateHeap
dsh, // dynamicStateHeap dsh, // dynamicStateHeap
reinterpret_cast<const void *>(threadGroupDimensions), // threadGroupDimensions reinterpret_cast<const void *>(threadGroupDimensions), // threadGroupDimensions
&additionalCommands, // additionalCommands &additionalCommands, // additionalCommands
commandListPreemptionMode, // preemptionMode commandListPreemptionMode, // preemptionMode
this->partitionCount, // partitionCount this->partitionCount, // partitionCount
launchParams.isIndirect, // isIndirect launchParams.isIndirect, // isIndirect
launchParams.isPredicate, // isPredicate launchParams.isPredicate, // isPredicate
isTimestampEvent, // isTimestampEvent isTimestampEvent, // isTimestampEvent
uncachedMocsKernel, // requiresUncachedMocs uncachedMocsKernel, // requiresUncachedMocs
kernelDescriptor.kernelAttributes.flags.useGlobalAtomics, // useGlobalAtomics cmdListDefaultGlobalAtomics, // useGlobalAtomics
internalUsage, // isInternal internalUsage, // isInternal
launchParams.isCooperative, // isCooperative launchParams.isCooperative, // isCooperative
isHostSignalScopeEvent, // isHostScopeSignalEvent isHostSignalScopeEvent, // isHostScopeSignalEvent
isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation
cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::RenderCompute, // isRcs engineGroupType == NEO::EngineGroupType::RenderCompute, // isRcs
this->dcFlushSupport // dcFlushEnable this->dcFlushSupport // dcFlushEnable
}; };
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper()); NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
if (!this->isFlushTaskSubmissionEnabled) { if (!this->isFlushTaskSubmissionEnabled) {

View File

@@ -1552,212 +1552,3 @@ HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSetWhe
EXPECT_EQ(0u, pipeControl->getImmediateData()); EXPECT_EQ(0u, pipeControl->getImmediateData());
EXPECT_EQ(gpuAddress, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl)); EXPECT_EQ(gpuAddress, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
} }
HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateNotInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = true;
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotApplicable;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = false;
commandStreamReceiver.lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true;
dispatchFlags.kernelExecutionType = KernelExecutionType::NotApplicable;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
commandStreamReceiver.lastKernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.feSupportFlags.disableEuFusion = true;
dispatchFlags.disableEUFusion = false;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = 0;
dispatchFlags.disableEUFusion = true;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
dispatchFlags.disableEUFusion = false;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = -1;
dispatchFlags.disableEUFusion = false;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
}
HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = true;
commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 0;
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 1;
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = false;
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true;
commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 0;
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 1;
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
}
HWTEST_F(UltCommandStreamReceiverTest, givenPipelineSelectStateNotInitedWhenTransitionPipelineSelectPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.pipelineSupportFlags.systolicMode = false;
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false;
commandStreamReceiver.lastMediaSamplerConfig = -1;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
commandStreamReceiver.lastMediaSamplerConfig = 0;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false;
commandStreamReceiver.pipelineSupportFlags.systolicMode = true;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.pipelineSupportFlags.systolicMode = false;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.pipelineSupportFlags.systolicMode = true;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
}
HWTEST_F(UltCommandStreamReceiverTest,
givenPipelineSelectStateInitedWhenTransitionPipelineSelectPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.pipelineSupportFlags.systolicMode = false;
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 1;
commandStreamReceiver.lastMediaSamplerConfig = -1;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 0;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 0;
commandStreamReceiver.lastMediaSamplerConfig = 1;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false;
commandStreamReceiver.pipelineSupportFlags.systolicMode = true;
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 1;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 0;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 0;
commandStreamReceiver.lastSystolicPipelineSelectMode = true;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
}

View File

@@ -374,46 +374,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenMultE
EXPECT_TRUE(stateBaseAddress->getDisableSupportForMultiGpuPartialWritesForStatelessMessages()); EXPECT_TRUE(stateBaseAddress->getDisableSupportForMultiGpuPartialWritesForStatelessMessages());
} }
struct MultiGpuGlobalAtomicsTest : public XeHpCommandStreamReceiverFlushTaskTests,
public ::testing::WithParamInterface<std::tuple<bool, bool, bool, bool>> {
};
HWCMDTEST_P(IGFX_XE_HP_CORE, MultiGpuGlobalAtomicsTest, givenFlushingCommandStreamReceiverThenDisableSupportForMultiGpuAtomicsForStatelessAccessesIsSetCorrectly) {
bool isMultiOsContextCapable, useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization;
std::tie(isMultiOsContextCapable, useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization) = GetParam();
DebugManagerStateRestore stateRestore;
DebugManager.flags.EnableMultiGpuAtomicsOptimization.set(enableMultiGpuAtomicsOptimization);
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.multiOsContextCapable = isMultiOsContextCapable;
flushTaskFlags.useGlobalAtomics = useGlobalAtomics;
flushTaskFlags.areMultipleSubDevicesInContext = areMultipleSubDevicesInContext;
flushTask(commandStreamReceiver, false, 0, false, false);
HardwareParse hwParserCsr;
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
hwParserCsr.findHardwareCommands<FamilyType>();
ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress);
auto stateBaseAddress = static_cast<STATE_BASE_ADDRESS *>(hwParserCsr.cmdStateBaseAddress);
auto enabled = isMultiOsContextCapable;
if (enableMultiGpuAtomicsOptimization) {
enabled = useGlobalAtomics && (enabled || areMultipleSubDevicesInContext);
}
EXPECT_EQ(!enabled, stateBaseAddress->getDisableSupportForMultiGpuAtomicsForStatelessAccesses());
}
INSTANTIATE_TEST_CASE_P(MultiGpuGlobalAtomics,
MultiGpuGlobalAtomicsTest,
::testing::Combine(
::testing::Bool(),
::testing::Bool(),
::testing::Bool(),
::testing::Bool()));
HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenDebugKeysThatOverrideMultiGpuSettingWhenStateBaseAddressIsProgrammedThenValuesMatch) { HWCMDTEST_F(IGFX_XE_HP_CORE, XeHpCommandStreamReceiverFlushTaskTests, givenDebugKeysThatOverrideMultiGpuSettingWhenStateBaseAddressIsProgrammedThenValuesMatch) {
DebugManagerStateRestore restorer; DebugManagerStateRestore restorer;
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;

View File

@@ -136,8 +136,6 @@ struct EncodeDispatchKernel {
static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment); static void adjustWalkOrder(WALKER_TYPE &walkerCmd, uint32_t requiredWorkGroupOrder, const RootDeviceEnvironment &rootDeviceEnvironment);
static constexpr bool shouldUpdateGlobalAtomics(bool &currentVal, bool refVal, bool updateCurrent);
static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount); static size_t getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount);
static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo); static size_t getSizeRequiredSsh(const KernelInfo &kernelInfo);
inline static size_t additionalSizeRequiredDsh(uint32_t iddCount); inline static size_t additionalSizeRequiredDsh(uint32_t iddCount);

View File

@@ -727,9 +727,6 @@ void EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(INTERFACE_DESCRIPT
template <typename Family> template <typename Family>
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf) {} void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const Device &device, const HardwareInfo &hwInfo, const uint32_t threadGroupCount, const uint32_t numGrf) {}
template <typename Family>
constexpr bool EncodeDispatchKernel<Family>::shouldUpdateGlobalAtomics(bool &currentVal, bool refVal, bool updateCurrent) { return false; }
template <typename Family> template <typename Family>
size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount) { size_t EncodeDispatchKernel<Family>::getSizeRequiredDsh(const KernelDescriptor &kernelDescriptor, uint32_t iddCount) {
using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename Family::INTERFACE_DESCRIPTOR_DATA;

View File

@@ -220,8 +220,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
} }
} }
if (shouldUpdateGlobalAtomics(container.lastSentUseGlobalAtomics, args.useGlobalAtomics, args.partitionCount > 1) || if (container.isAnyHeapDirty() ||
container.isAnyHeapDirty() ||
args.requiresUncachedMocs) { args.requiresUncachedMocs) {
PipeControlArgs syncArgs; PipeControlArgs syncArgs;

View File

@@ -92,6 +92,7 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi
auto &productHelper = getProductHelper(); auto &productHelper = getProductHelper();
productHelper.fillFrontEndPropertiesSupportStructure(feSupportFlags, hwInfo); productHelper.fillFrontEndPropertiesSupportStructure(feSupportFlags, hwInfo);
productHelper.fillPipelineSelectPropertiesSupportStructure(pipelineSupportFlags, hwInfo); productHelper.fillPipelineSelectPropertiesSupportStructure(pipelineSupportFlags, hwInfo);
productHelper.fillStateBaseAddressPropertiesSupportStructure(sbaSupportFlags);
} }
CommandStreamReceiver::~CommandStreamReceiver() { CommandStreamReceiver::~CommandStreamReceiver() {
@@ -530,8 +531,12 @@ void CommandStreamReceiver::initProgrammingFlags() {
lastSentL3Config = 0; lastSentL3Config = 0;
lastMediaSamplerConfig = -1; lastMediaSamplerConfig = -1;
lastPreemptionMode = PreemptionMode::Initial; lastPreemptionMode = PreemptionMode::Initial;
latestSentStatelessMocsConfig = CacheSettings::unknownMocs; latestSentStatelessMocsConfig = CacheSettings::unknownMocs;
this->streamProperties.stateBaseAddress.statelessMocs = {};
lastSentUseGlobalAtomics = false; lastSentUseGlobalAtomics = false;
this->streamProperties.stateBaseAddress.globalAtomics = {};
} }
void CommandStreamReceiver::programForAubSubCapture(bool wasActiveInPreviousEnqueue, bool isActive) { void CommandStreamReceiver::programForAubSubCapture(bool wasActiveInPreviousEnqueue, bool isActive) {

View File

@@ -427,6 +427,7 @@ class CommandStreamReceiver {
StreamProperties streamProperties{}; StreamProperties streamProperties{};
FrontEndPropertiesSupport feSupportFlags{}; FrontEndPropertiesSupport feSupportFlags{};
PipelineSelectPropertiesSupport pipelineSupportFlags{}; PipelineSelectPropertiesSupport pipelineSupportFlags{};
StateBaseAddressPropertiesSupport sbaSupportFlags{};
uint64_t totalMemoryUsed = 0u; uint64_t totalMemoryUsed = 0u;

View File

@@ -185,10 +185,10 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
bool checkPlatformSupportsGpuIdleImplicitFlush() const; bool checkPlatformSupportsGpuIdleImplicitFlush() const;
void configurePostSyncWriteOffset(); void configurePostSyncWriteOffset();
void unregisterDirectSubmissionFromController(); void unregisterDirectSubmissionFromController();
constexpr bool isGlobalAtomicsProgrammingRequired(bool currentValue) const;
void createKernelArgsBufferAllocation() override; void createKernelArgsBufferAllocation() override;
void handleFrontEndStateTransition(DispatchFlags &dispatchFlags); void handleFrontEndStateTransition(const DispatchFlags &dispatchFlags);
void handlePipelineSelectStateTransition(DispatchFlags &dispatchFlags); void handlePipelineSelectStateTransition(const DispatchFlags &dispatchFlags);
void handleStateBaseAddressStateTransition(const DispatchFlags &dispatchFlags, bool &isStateBaseAddressDirty);
HeapDirtyState dshState; HeapDirtyState dshState;
HeapDirtyState iohState; HeapDirtyState iohState;

View File

@@ -480,39 +480,11 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty; auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty;
auto mocsIndex = latestSentStatelessMocsConfig; handleStateBaseAddressStateTransition(dispatchFlags, isStateBaseAddressDirty);
if (dispatchFlags.l3CacheSettings != L3CachingSettings::NotApplicable) {
auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff;
auto l1On = dispatchFlags.l3CacheSettings == L3CachingSettings::l3AndL1On;
mocsIndex = gfxCoreHelper.getMocsIndex(*device.getGmmHelper(), l3On, l1On);
}
if (mocsIndex != latestSentStatelessMocsConfig) {
isStateBaseAddressDirty = true;
latestSentStatelessMocsConfig = mocsIndex;
}
this->streamProperties.stateBaseAddress.setPropertyStatelessMocs(mocsIndex);
if (this->isGlobalAtomicsProgrammingRequired(dispatchFlags.useGlobalAtomics) && (this->isMultiOsContextCapable() || dispatchFlags.areMultipleSubDevicesInContext)) {
isStateBaseAddressDirty = true;
lastSentUseGlobalAtomics = dispatchFlags.useGlobalAtomics;
this->streamProperties.stateBaseAddress.setPropertyGlobalAtomics(lastSentUseGlobalAtomics, rootDeviceEnvironment, false);
}
bool debuggingEnabled = device.getDebugger() != nullptr; bool debuggingEnabled = device.getDebugger() != nullptr;
bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false; bool sourceLevelDebuggerActive = device.getSourceLevelDebugger() != nullptr ? true : false;
auto memoryCompressionState = lastMemoryCompressionState;
if (dispatchFlags.memoryCompressionState != MemoryCompressionState::NotApplicable) {
memoryCompressionState = dispatchFlags.memoryCompressionState;
}
if (memoryCompressionState != lastMemoryCompressionState) {
isStateBaseAddressDirty = true;
lastMemoryCompressionState = memoryCompressionState;
}
// Reprogram state base address if required // Reprogram state base address if required
if (isStateBaseAddressDirty || sourceLevelDebuggerActive) { if (isStateBaseAddressDirty || sourceLevelDebuggerActive) {
EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(commandStreamCSR, rootDeviceEnvironment, isRcs(), this->dcFlushSupport); EncodeWA<GfxFamily>::addPipeControlBeforeStateBaseAddress(commandStreamCSR, rootDeviceEnvironment, isRcs(), this->dcFlushSupport);
@@ -546,13 +518,13 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
ssh, // ssh ssh, // ssh
device.getGmmHelper(), // gmmHelper device.getGmmHelper(), // gmmHelper
&hwInfo, // hwInfo &hwInfo, // hwInfo
mocsIndex, // statelessMocsIndex this->latestSentStatelessMocsConfig, // statelessMocsIndex
memoryCompressionState, // memoryCompressionState this->lastMemoryCompressionState, // memoryCompressionState
true, // setInstructionStateBaseAddress true, // setInstructionStateBaseAddress
true, // setGeneralStateBaseAddress true, // setGeneralStateBaseAddress
false, // useGlobalHeapsBaseAddress false, // useGlobalHeapsBaseAddress
isMultiOsContextCapable(), // isMultiOsContextCapable isMultiOsContextCapable(), // isMultiOsContextCapable
dispatchFlags.useGlobalAtomics, // useGlobalAtomics this->lastSentUseGlobalAtomics, // useGlobalAtomics
dispatchFlags.areMultipleSubDevicesInContext, // areMultipleSubDevicesInContext dispatchFlags.areMultipleSubDevicesInContext, // areMultipleSubDevicesInContext
false, // overrideSurfaceStateBaseAddress false, // overrideSurfaceStateBaseAddress
debuggingEnabled || device.isDebuggerActive() // isDebuggerActive debuggingEnabled || device.isDebuggerActive() // isDebuggerActive
@@ -1625,11 +1597,6 @@ size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForComputeMode() {
return EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(this->peekRootDeviceEnvironment(), hasSharedHandles(), isRcs()); return EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(this->peekRootDeviceEnvironment(), hasSharedHandles(), isRcs());
} }
template <typename GfxFamily>
constexpr bool CommandStreamReceiverHw<GfxFamily>::isGlobalAtomicsProgrammingRequired(bool currentVal) const {
return false;
}
template <typename GfxFamily> template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::createKernelArgsBufferAllocation() { void CommandStreamReceiverHw<GfxFamily>::createKernelArgsBufferAllocation() {
} }
@@ -1640,7 +1607,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSu
} }
template <typename GfxFamily> template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(DispatchFlags &dispatchFlags) { void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(const DispatchFlags &dispatchFlags) {
if (streamProperties.frontEndState.disableOverdispatch.value != -1) { if (streamProperties.frontEndState.disableOverdispatch.value != -1) {
lastAdditionalKernelExecInfo = streamProperties.frontEndState.disableOverdispatch.value == 1 ? AdditionalKernelExecInfo::DisableOverdispatch : AdditionalKernelExecInfo::NotSet; lastAdditionalKernelExecInfo = streamProperties.frontEndState.disableOverdispatch.value == 1 ? AdditionalKernelExecInfo::DisableOverdispatch : AdditionalKernelExecInfo::NotSet;
} }
@@ -1665,7 +1632,7 @@ void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(DispatchF
} }
template <typename GfxFamily> template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handlePipelineSelectStateTransition(DispatchFlags &dispatchFlags) { void CommandStreamReceiverHw<GfxFamily>::handlePipelineSelectStateTransition(const DispatchFlags &dispatchFlags) {
if (streamProperties.pipelineSelect.mediaSamplerDopClockGate.value != -1) { if (streamProperties.pipelineSelect.mediaSamplerDopClockGate.value != -1) {
this->lastMediaSamplerConfig = static_cast<int8_t>(streamProperties.pipelineSelect.mediaSamplerDopClockGate.value); this->lastMediaSamplerConfig = static_cast<int8_t>(streamProperties.pipelineSelect.mediaSamplerDopClockGate.value);
} }
@@ -1685,4 +1652,48 @@ bool CommandStreamReceiverHw<GfxFamily>::directSubmissionRelaxedOrderingEnabled(
(blitterDirectSubmission.get() && blitterDirectSubmission->isRelaxedOrderingEnabled())); (blitterDirectSubmission.get() && blitterDirectSubmission->isRelaxedOrderingEnabled()));
} }
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleStateBaseAddressStateTransition(const DispatchFlags &dispatchFlags, bool &isStateBaseAddressDirty) {
auto &rootDeviceEnvironment = this->peekRootDeviceEnvironment();
if (this->streamProperties.stateBaseAddress.statelessMocs.value != -1) {
this->latestSentStatelessMocsConfig = static_cast<uint32_t>(this->streamProperties.stateBaseAddress.statelessMocs.value);
}
auto mocsIndex = this->latestSentStatelessMocsConfig;
if (dispatchFlags.l3CacheSettings != L3CachingSettings::NotApplicable) {
auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff;
auto l1On = dispatchFlags.l3CacheSettings == L3CachingSettings::l3AndL1On;
auto &gfxCoreHelper = getGfxCoreHelper();
mocsIndex = gfxCoreHelper.getMocsIndex(*rootDeviceEnvironment.getGmmHelper(), l3On, l1On);
}
if (mocsIndex != this->latestSentStatelessMocsConfig) {
isStateBaseAddressDirty = true;
this->latestSentStatelessMocsConfig = mocsIndex;
}
this->streamProperties.stateBaseAddress.setPropertyStatelessMocs(mocsIndex);
auto memoryCompressionState = this->lastMemoryCompressionState;
if (dispatchFlags.memoryCompressionState != MemoryCompressionState::NotApplicable) {
memoryCompressionState = dispatchFlags.memoryCompressionState;
}
if (memoryCompressionState != this->lastMemoryCompressionState) {
isStateBaseAddressDirty = true;
this->lastMemoryCompressionState = memoryCompressionState;
}
if (this->sbaSupportFlags.globalAtomics) {
if (this->streamProperties.stateBaseAddress.globalAtomics.value != -1) {
this->lastSentUseGlobalAtomics = !!this->streamProperties.stateBaseAddress.globalAtomics.value;
}
bool globalAtomics = (this->isMultiOsContextCapable() || dispatchFlags.areMultipleSubDevicesInContext) && dispatchFlags.useGlobalAtomics;
if (this->lastSentUseGlobalAtomics != globalAtomics) {
isStateBaseAddressDirty = true;
this->lastSentUseGlobalAtomics = globalAtomics;
}
this->streamProperties.stateBaseAddress.setPropertyGlobalAtomics(globalAtomics, rootDeviceEnvironment, false);
}
}
} // namespace NEO } // namespace NEO

View File

@@ -73,15 +73,6 @@ void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCR
} }
} }
template <>
constexpr bool EncodeDispatchKernel<Family>::shouldUpdateGlobalAtomics(bool &currentVal, bool refVal, bool predicate) {
if (predicate && currentVal != refVal) {
currentVal = refVal;
return true;
}
return false;
}
template <> template <>
void adjustL3ControlField<Family>(void *l3ControlBuffer) { ; } void adjustL3ControlField<Family>(void *l3ControlBuffer) { ; }

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2021-2022 Intel Corporation * Copyright (C) 2021-2023 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -71,11 +71,6 @@ size_t CommandStreamReceiverHw<Family>::getCmdSizeForPerDssBackedBuffer(const Ha
template <> template <>
void CommandStreamReceiverHw<Family>::addPipeControlBefore3dState(LinearStream &commandStream, DispatchFlags &dispatchFlags) {} void CommandStreamReceiverHw<Family>::addPipeControlBefore3dState(LinearStream &commandStream, DispatchFlags &dispatchFlags) {}
template <>
constexpr bool CommandStreamReceiverHw<Family>::isGlobalAtomicsProgrammingRequired(bool currentValue) const {
return currentValue != this->lastSentUseGlobalAtomics;
}
template <> template <>
void BlitCommandsHelper<Family>::appendClearColor(const BlitProperties &blitProperties, typename Family::XY_BLOCK_COPY_BLT &blitCmd) { void BlitCommandsHelper<Family>::appendClearColor(const BlitProperties &blitProperties, typename Family::XY_BLOCK_COPY_BLT &blitCmd) {
using XY_BLOCK_COPY_BLT = typename Family::XY_BLOCK_COPY_BLT; using XY_BLOCK_COPY_BLT = typename Family::XY_BLOCK_COPY_BLT;

View File

@@ -53,12 +53,10 @@ struct XeHpCore {
struct StateBaseAddressStateSupport { struct StateBaseAddressStateSupport {
static constexpr bool globalAtomics = true; static constexpr bool globalAtomics = true;
static constexpr bool statelessMocs = true;
static constexpr bool bindingTablePoolBaseAddress = true; static constexpr bool bindingTablePoolBaseAddress = true;
}; };
struct PipelineSelectStateSupport { struct PipelineSelectStateSupport {
static constexpr bool modeSelected = true;
static constexpr bool mediaSamplerDopClockGate = true; static constexpr bool mediaSamplerDopClockGate = true;
static constexpr bool systolicMode = true; static constexpr bool systolicMode = true;
}; };

View File

@@ -42,6 +42,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::getScratchSpaceController; using BaseClass::getScratchSpaceController;
using BaseClass::handleFrontEndStateTransition; using BaseClass::handleFrontEndStateTransition;
using BaseClass::handlePipelineSelectStateTransition; using BaseClass::handlePipelineSelectStateTransition;
using BaseClass::handleStateBaseAddressStateTransition;
using BaseClass::indirectHeap; using BaseClass::indirectHeap;
using BaseClass::iohState; using BaseClass::iohState;
using BaseClass::isBlitterDirectSubmissionEnabled; using BaseClass::isBlitterDirectSubmissionEnabled;
@@ -120,6 +121,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::requiredScratchSize; using BaseClass::CommandStreamReceiver::requiredScratchSize;
using BaseClass::CommandStreamReceiver::resourcesInitialized; using BaseClass::CommandStreamReceiver::resourcesInitialized;
using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired; using BaseClass::CommandStreamReceiver::samplerCacheFlushRequired;
using BaseClass::CommandStreamReceiver::sbaSupportFlags;
using BaseClass::CommandStreamReceiver::scratchSpaceController; using BaseClass::CommandStreamReceiver::scratchSpaceController;
using BaseClass::CommandStreamReceiver::stallingCommandsOnNextFlushRequired; using BaseClass::CommandStreamReceiver::stallingCommandsOnNextFlushRequired;
using BaseClass::CommandStreamReceiver::submissionAggregator; using BaseClass::CommandStreamReceiver::submissionAggregator;

View File

@@ -257,6 +257,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
using CommandStreamReceiver::pageTableManagerInitialized; using CommandStreamReceiver::pageTableManagerInitialized;
using CommandStreamReceiver::postSyncWriteOffset; using CommandStreamReceiver::postSyncWriteOffset;
using CommandStreamReceiver::requiredScratchSize; using CommandStreamReceiver::requiredScratchSize;
using CommandStreamReceiver::sbaSupportFlags;
using CommandStreamReceiver::streamProperties; using CommandStreamReceiver::streamProperties;
using CommandStreamReceiver::tagAddress; using CommandStreamReceiver::tagAddress;
using CommandStreamReceiver::taskCount; using CommandStreamReceiver::taskCount;

View File

@@ -27,6 +27,7 @@
#include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/batch_buffer_helper.h" #include "shared/test/common/helpers/batch_buffer_helper.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/dispatch_flags_helper.h"
#include "shared/test/common/helpers/engine_descriptor_helper.h" #include "shared/test/common/helpers/engine_descriptor_helper.h"
#include "shared/test/common/helpers/gtest_helpers.h" #include "shared/test/common/helpers/gtest_helpers.h"
#include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/unit_test_helper.h"
@@ -2317,6 +2318,215 @@ HWTEST_F(CommandStreamReceiverTest, givenDshDirtyStateWhenUpdatingStateWithNewHe
EXPECT_FALSE(check); EXPECT_FALSE(check);
} }
HWTEST_F(CommandStreamReceiverTest, givenFrontEndStateNotInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = true;
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotApplicable;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = false;
commandStreamReceiver.lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true;
dispatchFlags.kernelExecutionType = KernelExecutionType::NotApplicable;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
commandStreamReceiver.lastKernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.feSupportFlags.disableEuFusion = true;
dispatchFlags.disableEUFusion = false;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = 0;
dispatchFlags.disableEUFusion = true;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
dispatchFlags.disableEUFusion = false;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = -1;
dispatchFlags.disableEUFusion = false;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
}
HWTEST_F(CommandStreamReceiverTest, givenFrontEndStateInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = true;
commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 0;
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 1;
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = false;
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true;
commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 0;
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 1;
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
}
HWTEST_F(CommandStreamReceiverTest, givenPipelineSelectStateNotInitedWhenTransitionPipelineSelectPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.pipelineSupportFlags.systolicMode = false;
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false;
commandStreamReceiver.lastMediaSamplerConfig = -1;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
commandStreamReceiver.lastMediaSamplerConfig = 0;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false;
commandStreamReceiver.pipelineSupportFlags.systolicMode = true;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.pipelineSupportFlags.systolicMode = false;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.pipelineSupportFlags.systolicMode = true;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
}
HWTEST_F(CommandStreamReceiverTest,
givenPipelineSelectStateInitedWhenTransitionPipelineSelectPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.pipelineSupportFlags.systolicMode = false;
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = true;
commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 1;
commandStreamReceiver.lastMediaSamplerConfig = -1;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 0;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.streamProperties.pipelineSelect.mediaSamplerDopClockGate.value = 0;
commandStreamReceiver.lastMediaSamplerConfig = 1;
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.mediaSamplerConfigChanged);
commandStreamReceiver.pipelineSupportFlags.mediaSamplerDopClockGate = false;
commandStreamReceiver.pipelineSupportFlags.systolicMode = true;
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 1;
commandStreamReceiver.lastSystolicPipelineSelectMode = false;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 0;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = true;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
commandStreamReceiver.streamProperties.pipelineSelect.systolicMode.value = 0;
commandStreamReceiver.lastSystolicPipelineSelectMode = true;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = false;
commandStreamReceiver.handlePipelineSelectStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.csrSizeRequestFlags.systolicPipelineSelectMode);
}
using CommandStreamReceiverHwTest = Test<CommandStreamReceiverFixture>; using CommandStreamReceiverHwTest = Test<CommandStreamReceiverFixture>;
HWTEST2_F(CommandStreamReceiverHwTest, givenSshHeapNotProvidedWhenFlushTaskPerformedThenSbaProgammedSurfaceBaseAddressToZero, IsAtLeastXeHpCore) { HWTEST2_F(CommandStreamReceiverHwTest, givenSshHeapNotProvidedWhenFlushTaskPerformedThenSbaProgammedSurfaceBaseAddressToZero, IsAtLeastXeHpCore) {
@@ -2642,4 +2852,71 @@ HWTEST_F(CommandStreamReceiverHwTest, givenNullPtrAsMultiRootDeviceSyncNodeWhenF
cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(++cmdIterator, hwParser.cmdList.end()); cmdIterator = find<typename FamilyType::MI_FLUSH_DW *>(++cmdIterator, hwParser.cmdList.end());
} }
EXPECT_FALSE(nodeAddressFound); EXPECT_FALSE(nodeAddressFound);
} }
struct MultiGpuGlobalAtomicsTest : public CommandStreamReceiverHwTest,
public ::testing::WithParamInterface<std::tuple<bool, bool, bool, bool>> {
};
HWCMDTEST_P(IGFX_XE_HP_CORE, MultiGpuGlobalAtomicsTest, givenFlushingCommandStreamReceiverThenDisableSupportForMultiGpuAtomicsForStatelessAccessesIsSetCorrectly) {
bool isMultiOsContextCapable, useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization;
std::tie(isMultiOsContextCapable, useGlobalAtomics, areMultipleSubDevicesInContext, enableMultiGpuAtomicsOptimization) = GetParam();
DebugManagerStateRestore stateRestore;
DebugManager.flags.EnableMultiGpuAtomicsOptimization.set(enableMultiGpuAtomicsOptimization);
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.sbaSupportFlags.globalAtomics = true;
commandStreamReceiver.multiOsContextCapable = isMultiOsContextCapable;
flushTaskFlags.useGlobalAtomics = useGlobalAtomics;
flushTaskFlags.areMultipleSubDevicesInContext = areMultipleSubDevicesInContext;
commandStreamReceiver.flushTask(
commandStream,
0,
&dsh,
&ioh,
&ssh,
taskLevel,
flushTaskFlags,
*pDevice);
HardwareParse hwParserCsr;
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.commandStream, 0);
hwParserCsr.findHardwareCommands<FamilyType>();
ASSERT_NE(nullptr, hwParserCsr.cmdStateBaseAddress);
auto stateBaseAddress = static_cast<STATE_BASE_ADDRESS *>(hwParserCsr.cmdStateBaseAddress);
auto enabled = isMultiOsContextCapable;
if (enableMultiGpuAtomicsOptimization) {
enabled = useGlobalAtomics && (enabled || areMultipleSubDevicesInContext);
}
EXPECT_EQ(!enabled, stateBaseAddress->getDisableSupportForMultiGpuAtomicsForStatelessAccesses());
auto offset = commandStreamReceiver.getCS(0).getUsed();
commandStreamReceiver.flushTask(
commandStream,
offset,
&dsh,
&ioh,
&ssh,
taskLevel,
flushTaskFlags,
*pDevice);
hwParserCsr.cmdList.clear();
hwParserCsr.parseCommands<FamilyType>(commandStreamReceiver.getCS(0), offset);
stateBaseAddress = hwParserCsr.getCommand<STATE_BASE_ADDRESS>();
EXPECT_EQ(nullptr, stateBaseAddress);
}
INSTANTIATE_TEST_CASE_P(MultiGpuGlobalAtomics,
MultiGpuGlobalAtomicsTest,
::testing::Combine(
::testing::Bool(),
::testing::Bool(),
::testing::Bool(),
::testing::Bool()));