refactor: add infrastructure for setting L1 flush mode

Related-To: NEO-15936

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-12-11 15:34:11 +00:00
committed by Compute-Runtime-Automation
parent c7b31ad0da
commit c265bc692f
12 changed files with 90 additions and 1 deletions

View File

@@ -23,6 +23,7 @@ struct StateComputeModePropertiesSupport {
bool allocationForScratchAndMidthreadPreemption = false;
bool enableVariableRegisterSizeAllocation = false;
bool pipelinedEuThreadArbitration = false;
bool enableL1FlushUavCoherencyMode = false;
bool lscSamplerBackingThreshold = false;
bool enableOutOfBoundariesInTranslationException = false;
bool enablePageFaultException = false;
@@ -42,6 +43,7 @@ struct StateComputeModeProperties {
StreamProperty memoryAllocationForScratchAndMidthreadPreemptionBuffers{};
StreamProperty enableVariableRegisterSizeAllocation{};
StreamProperty pipelinedEuThreadArbitration{};
StreamProperty enableL1FlushUavCoherencyMode{};
StreamProperty lscSamplerBackingThreshold{};
StreamProperty enableOutOfBoundariesInTranslationException{};
StreamProperty enablePageFaultException{};

View File

@@ -62,6 +62,7 @@ void StateComputeModeProperties::copyPropertiesAll(const StateComputeModePropert
memoryAllocationForScratchAndMidthreadPreemptionBuffers.set(properties.memoryAllocationForScratchAndMidthreadPreemptionBuffers.value);
enableVariableRegisterSizeAllocation.set(properties.enableVariableRegisterSizeAllocation.value);
pipelinedEuThreadArbitration.set(properties.pipelinedEuThreadArbitration.value);
enableL1FlushUavCoherencyMode.set(properties.enableL1FlushUavCoherencyMode.value);
enablePageFaultException.set(properties.enablePageFaultException.value);
enableSystemMemoryReadFence.set(properties.enableSystemMemoryReadFence.value);
enableMemoryException.set(properties.enableMemoryException.value);
@@ -93,6 +94,7 @@ bool StateComputeModeProperties::isDirty() const {
memoryAllocationForScratchAndMidthreadPreemptionBuffers.isDirty ||
enableVariableRegisterSizeAllocation.isDirty ||
pipelinedEuThreadArbitration.isDirty ||
enableL1FlushUavCoherencyMode.isDirty ||
enablePageFaultException.isDirty ||
enableSystemMemoryReadFence.isDirty ||
enableMemoryException.isDirty ||
@@ -118,6 +120,7 @@ void StateComputeModeProperties::clearIsDirtyPerContext() {
devicePreemptionMode.isDirty = false;
enableVariableRegisterSizeAllocation.isDirty = false;
pipelinedEuThreadArbitration.isDirty = false;
enableL1FlushUavCoherencyMode.isDirty = false;
enablePageFaultException.isDirty = false;
enableSystemMemoryReadFence.isDirty = false;
enableMemoryException.isDirty = false;
@@ -188,6 +191,7 @@ void StateComputeModeProperties::resetState() {
this->memoryAllocationForScratchAndMidthreadPreemptionBuffers.value = StreamProperty::initValue;
this->enableVariableRegisterSizeAllocation.value = StreamProperty::initValue;
this->pipelinedEuThreadArbitration.value = StreamProperty::initValue;
this->enableL1FlushUavCoherencyMode.value = StreamProperty::initValue;
this->enablePageFaultException.value = StreamProperty::initValue;
this->enableSystemMemoryReadFence.value = StreamProperty::initValue;
this->enableMemoryException.value = StreamProperty::initValue;
@@ -216,6 +220,10 @@ void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency,
this->pipelinedEuThreadArbitration.set(true);
}
if (this->scmPropertiesSupport.enableL1FlushUavCoherencyMode) {
this->enableL1FlushUavCoherencyMode.set(this->scmPropertiesSupport.enableL1FlushUavCoherencyMode);
}
if (this->scmPropertiesSupport.enablePageFaultException) {
this->enablePageFaultException.set(this->scmPropertiesSupport.enablePageFaultException);
}

View File

@@ -584,6 +584,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceDispatchTimeoutCounter, -1, "Set timeout fo
DECLARE_DEBUG_VARIABLE(int32_t, OverrideNumThreadsPerEu, -1, "-1: default, >0: force number of threads per EU")
DECLARE_DEBUG_VARIABLE(int32_t, Enable64bAddressingForRayTracing, -1, "-1: default, 0: disabled, 1: enabled. Enable support for 64 bit addressing for RayTracing HSD-14016042915")
DECLARE_DEBUG_VARIABLE(int32_t, EnableXe3VariableRegisterSizeAllocation, -1, "When enabled, use new Xe3 Variable Register per Thread (VRT) feature, -1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, EnableL1FlushUavCoherencyMode, -1, "When enabled, state compute mode is configured with L1 flush for UAV coherency mode, -1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, ResourceBarrierL1FlushMode, -1, "Invalidate or flush L1 cache in RESOURCE_BARRIER instruction. -1: default, 0: disabled, 1: invalidate L1, 2: flush L1, 3: both")
/* IMPLICIT SCALING */

View File

@@ -78,6 +78,11 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
maskBits |= Family::stateComputeModeLargeGrfModeMask;
}
if (properties.enableL1FlushUavCoherencyMode.isDirty) {
stateComputeMode.setUavCoherencyMode(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_FLUSH_DATAPORT_L1);
maskBits2 |= Family::stateComputeModeUavCoherencyModeMask;
}
stateComputeMode.setMask1(maskBits);
stateComputeMode.setMask2(maskBits2);

View File

@@ -24,6 +24,7 @@ struct Xe2HpgCore {
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
// DW2
static constexpr uint32_t stateComputeModeUavCoherencyModeMask = (1u << 6);
static constexpr uint32_t stateComputeModeMemoryAllocationForScratchAndMidthreadPreemptionBuffersMask = (1u << 11);
static constexpr bool isUsingL3Control = false;
@@ -53,6 +54,7 @@ struct Xe2HpgCore {
static constexpr bool devicePreemptionMode = false;
static constexpr bool allocationForScratchAndMidthreadPreemption = true;
static constexpr bool enableL1FlushUavCoherencyMode = false;
};
struct StateBaseAddressStateSupport {

View File

@@ -51,6 +51,10 @@ void ProductHelperHw<gfxProduct>::fillScmPropertiesSupportStructure(StateCompute
fillScmPropertiesSupportStructureBase(propertiesSupport);
propertiesSupport.allocationForScratchAndMidthreadPreemption = GfxProduct::StateComputeModeStateSupport::allocationForScratchAndMidthreadPreemption;
propertiesSupport.enableL1FlushUavCoherencyMode = GfxProduct::StateComputeModeStateSupport::enableL1FlushUavCoherencyMode;
if (debugManager.flags.EnableL1FlushUavCoherencyMode.get() != -1) {
propertiesSupport.enableL1FlushUavCoherencyMode = !!debugManager.flags.EnableL1FlushUavCoherencyMode.get();
}
}
template <>

View File

@@ -66,6 +66,11 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
maskBits |= Family::stateComputeModeEnableVariableRegisterSizeAllocationMask;
}
if (properties.enableL1FlushUavCoherencyMode.isDirty) {
stateComputeMode.setUavCoherencyMode(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_FLUSH_DATAPORT_L1);
maskBits2 |= Family::stateComputeModeUavCoherencyModeMask;
}
stateComputeMode.setMask1(maskBits);
stateComputeMode.setMask2(maskBits2);

View File

@@ -26,6 +26,7 @@ struct Xe3Core {
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
// DW2
static constexpr uint32_t stateComputeModeUavCoherencyModeMask = (1u << 6);
static constexpr uint32_t bcsEngineCount = 1u;
static constexpr uint32_t timestampPacketCount = 16u;
@@ -55,6 +56,7 @@ struct Xe3Core {
static constexpr bool allocationForScratchAndMidthreadPreemption = true;
static constexpr bool enableVariableRegisterSizeAllocation = true;
static constexpr bool enableL1FlushUavCoherencyMode = false;
};
struct StateBaseAddressStateSupport {

View File

@@ -52,6 +52,11 @@ void ProductHelperHw<gfxProduct>::fillScmPropertiesSupportStructure(StateCompute
if (pipelinedEuThreadArbitration) {
propertiesSupport.pipelinedEuThreadArbitration = true;
}
propertiesSupport.enableL1FlushUavCoherencyMode = GfxProduct::StateComputeModeStateSupport::enableL1FlushUavCoherencyMode;
if (debugManager.flags.EnableL1FlushUavCoherencyMode.get() != -1) {
propertiesSupport.enableL1FlushUavCoherencyMode = !!debugManager.flags.EnableL1FlushUavCoherencyMode.get();
}
}
template <>

View File

@@ -30,6 +30,7 @@ struct Xe3pCore {
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
// DW2
static constexpr uint32_t stateComputeModeUavCoherencyModeMask = (1u << 6);
static constexpr uint32_t stateComputeModeEnableOutOfBoundariesInTranslationExceptionMask = (1u << 7);
static constexpr uint32_t stateComputeModePageFaultExceptionEnableMask = (1u << 9);
static constexpr uint32_t stateComputeModeSystemMemoryReadFenceEnableMask = (1u << 11);
@@ -66,6 +67,7 @@ struct Xe3pCore {
static constexpr bool allocationForScratchAndMidthreadPreemption = true;
static constexpr bool enableVariableRegisterSizeAllocation = true;
static constexpr bool enableL1FlushUavCoherencyMode = false;
static constexpr bool enablePageFaultException = false;
static constexpr bool enableSystemMemoryReadFence = false;
static constexpr bool enableMemoryException = false;

View File

@@ -685,6 +685,7 @@ Enable512NumGrfs = 1
EnableUsmPoolResidencyTracking = -1
EnableUsmPoolLazyInit = -1
ForcePrintsRedirection = -1
EnableL1FlushUavCoherencyMode = -1;
ResourceBarrierL1FlushMode = -1
InitialCounterBasedEventValue = -1
DirectSubmissionInitialSemaphoreValue = -1

View File

@@ -1,11 +1,12 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/command_container/encode_surface_state.h"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/gmm_helper/gmm_lib.h"
#include "shared/source/os_interface/product_helper.h"
@@ -63,3 +64,54 @@ HWTEST2_F(CommandEncodeStatesTestXe2AndLater, whenDebugFlagIsDisabledForAdjustPi
HWTEST2_F(ImplicitScalingTests, GivenXeAtLeastHpg2WhenCheckingPipeControlStallRequiredThenExpectTrue, IsAtLeastXe2HpgCore) {
EXPECT_FALSE(ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired());
}
HWTEST2_F(CommandEncodeStatesTestXe2AndLater, givenDebugFlagWhenProgrammingStateComputeModeThenEnableL1FlushUavCoherencyMode, IsAtLeastXe2HpgCore) {
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
DebugManagerStateRestore restore;
uint8_t buffer[sizeof(STATE_COMPUTE_MODE)]{};
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
{
// default
LinearStream linearStream(buffer, sizeof(buffer));
StreamProperties streamProperties{};
streamProperties.initSupport(rootDeviceEnvironment);
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false);
EncodeComputeMode<FamilyType>::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment);
auto &stateComputeModeCmd = *reinterpret_cast<STATE_COMPUTE_MODE *>(linearStream.getCpuBase());
EXPECT_EQ(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_DRAIN_DATAPORT_MODE, stateComputeModeCmd.getUavCoherencyMode());
}
{
// enabled
debugManager.flags.EnableL1FlushUavCoherencyMode.set(1);
LinearStream linearStream(buffer, sizeof(buffer));
StreamProperties streamProperties{};
streamProperties.initSupport(rootDeviceEnvironment);
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false);
EncodeComputeMode<FamilyType>::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment);
auto &stateComputeModeCmd = *reinterpret_cast<STATE_COMPUTE_MODE *>(linearStream.getCpuBase());
EXPECT_EQ(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_FLUSH_DATAPORT_L1, stateComputeModeCmd.getUavCoherencyMode());
}
{
// disabled
debugManager.flags.EnableL1FlushUavCoherencyMode.set(0);
LinearStream linearStream(buffer, sizeof(buffer));
StreamProperties streamProperties{};
streamProperties.initSupport(rootDeviceEnvironment);
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false);
EncodeComputeMode<FamilyType>::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment);
auto &stateComputeModeCmd = *reinterpret_cast<STATE_COMPUTE_MODE *>(linearStream.getCpuBase());
EXPECT_EQ(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_DRAIN_DATAPORT_MODE, stateComputeModeCmd.getUavCoherencyMode());
}
}