refactor: add infrastructure for setting L1 flush mode

Related-To: NEO-15936

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-12-11 15:34:11 +00:00
committed by Compute-Runtime-Automation
parent c7b31ad0da
commit c265bc692f
12 changed files with 90 additions and 1 deletions

View File

@@ -23,6 +23,7 @@ struct StateComputeModePropertiesSupport {
bool allocationForScratchAndMidthreadPreemption = false; bool allocationForScratchAndMidthreadPreemption = false;
bool enableVariableRegisterSizeAllocation = false; bool enableVariableRegisterSizeAllocation = false;
bool pipelinedEuThreadArbitration = false; bool pipelinedEuThreadArbitration = false;
bool enableL1FlushUavCoherencyMode = false;
bool lscSamplerBackingThreshold = false; bool lscSamplerBackingThreshold = false;
bool enableOutOfBoundariesInTranslationException = false; bool enableOutOfBoundariesInTranslationException = false;
bool enablePageFaultException = false; bool enablePageFaultException = false;
@@ -42,6 +43,7 @@ struct StateComputeModeProperties {
StreamProperty memoryAllocationForScratchAndMidthreadPreemptionBuffers{}; StreamProperty memoryAllocationForScratchAndMidthreadPreemptionBuffers{};
StreamProperty enableVariableRegisterSizeAllocation{}; StreamProperty enableVariableRegisterSizeAllocation{};
StreamProperty pipelinedEuThreadArbitration{}; StreamProperty pipelinedEuThreadArbitration{};
StreamProperty enableL1FlushUavCoherencyMode{};
StreamProperty lscSamplerBackingThreshold{}; StreamProperty lscSamplerBackingThreshold{};
StreamProperty enableOutOfBoundariesInTranslationException{}; StreamProperty enableOutOfBoundariesInTranslationException{};
StreamProperty enablePageFaultException{}; StreamProperty enablePageFaultException{};

View File

@@ -62,6 +62,7 @@ void StateComputeModeProperties::copyPropertiesAll(const StateComputeModePropert
memoryAllocationForScratchAndMidthreadPreemptionBuffers.set(properties.memoryAllocationForScratchAndMidthreadPreemptionBuffers.value); memoryAllocationForScratchAndMidthreadPreemptionBuffers.set(properties.memoryAllocationForScratchAndMidthreadPreemptionBuffers.value);
enableVariableRegisterSizeAllocation.set(properties.enableVariableRegisterSizeAllocation.value); enableVariableRegisterSizeAllocation.set(properties.enableVariableRegisterSizeAllocation.value);
pipelinedEuThreadArbitration.set(properties.pipelinedEuThreadArbitration.value); pipelinedEuThreadArbitration.set(properties.pipelinedEuThreadArbitration.value);
enableL1FlushUavCoherencyMode.set(properties.enableL1FlushUavCoherencyMode.value);
enablePageFaultException.set(properties.enablePageFaultException.value); enablePageFaultException.set(properties.enablePageFaultException.value);
enableSystemMemoryReadFence.set(properties.enableSystemMemoryReadFence.value); enableSystemMemoryReadFence.set(properties.enableSystemMemoryReadFence.value);
enableMemoryException.set(properties.enableMemoryException.value); enableMemoryException.set(properties.enableMemoryException.value);
@@ -93,6 +94,7 @@ bool StateComputeModeProperties::isDirty() const {
memoryAllocationForScratchAndMidthreadPreemptionBuffers.isDirty || memoryAllocationForScratchAndMidthreadPreemptionBuffers.isDirty ||
enableVariableRegisterSizeAllocation.isDirty || enableVariableRegisterSizeAllocation.isDirty ||
pipelinedEuThreadArbitration.isDirty || pipelinedEuThreadArbitration.isDirty ||
enableL1FlushUavCoherencyMode.isDirty ||
enablePageFaultException.isDirty || enablePageFaultException.isDirty ||
enableSystemMemoryReadFence.isDirty || enableSystemMemoryReadFence.isDirty ||
enableMemoryException.isDirty || enableMemoryException.isDirty ||
@@ -118,6 +120,7 @@ void StateComputeModeProperties::clearIsDirtyPerContext() {
devicePreemptionMode.isDirty = false; devicePreemptionMode.isDirty = false;
enableVariableRegisterSizeAllocation.isDirty = false; enableVariableRegisterSizeAllocation.isDirty = false;
pipelinedEuThreadArbitration.isDirty = false; pipelinedEuThreadArbitration.isDirty = false;
enableL1FlushUavCoherencyMode.isDirty = false;
enablePageFaultException.isDirty = false; enablePageFaultException.isDirty = false;
enableSystemMemoryReadFence.isDirty = false; enableSystemMemoryReadFence.isDirty = false;
enableMemoryException.isDirty = false; enableMemoryException.isDirty = false;
@@ -188,6 +191,7 @@ void StateComputeModeProperties::resetState() {
this->memoryAllocationForScratchAndMidthreadPreemptionBuffers.value = StreamProperty::initValue; this->memoryAllocationForScratchAndMidthreadPreemptionBuffers.value = StreamProperty::initValue;
this->enableVariableRegisterSizeAllocation.value = StreamProperty::initValue; this->enableVariableRegisterSizeAllocation.value = StreamProperty::initValue;
this->pipelinedEuThreadArbitration.value = StreamProperty::initValue; this->pipelinedEuThreadArbitration.value = StreamProperty::initValue;
this->enableL1FlushUavCoherencyMode.value = StreamProperty::initValue;
this->enablePageFaultException.value = StreamProperty::initValue; this->enablePageFaultException.value = StreamProperty::initValue;
this->enableSystemMemoryReadFence.value = StreamProperty::initValue; this->enableSystemMemoryReadFence.value = StreamProperty::initValue;
this->enableMemoryException.value = StreamProperty::initValue; this->enableMemoryException.value = StreamProperty::initValue;
@@ -216,6 +220,10 @@ void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency,
this->pipelinedEuThreadArbitration.set(true); this->pipelinedEuThreadArbitration.set(true);
} }
if (this->scmPropertiesSupport.enableL1FlushUavCoherencyMode) {
this->enableL1FlushUavCoherencyMode.set(this->scmPropertiesSupport.enableL1FlushUavCoherencyMode);
}
if (this->scmPropertiesSupport.enablePageFaultException) { if (this->scmPropertiesSupport.enablePageFaultException) {
this->enablePageFaultException.set(this->scmPropertiesSupport.enablePageFaultException); this->enablePageFaultException.set(this->scmPropertiesSupport.enablePageFaultException);
} }

View File

@@ -584,6 +584,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceDispatchTimeoutCounter, -1, "Set timeout fo
DECLARE_DEBUG_VARIABLE(int32_t, OverrideNumThreadsPerEu, -1, "-1: default, >0: force number of threads per EU") DECLARE_DEBUG_VARIABLE(int32_t, OverrideNumThreadsPerEu, -1, "-1: default, >0: force number of threads per EU")
DECLARE_DEBUG_VARIABLE(int32_t, Enable64bAddressingForRayTracing, -1, "-1: default, 0: disabled, 1: enabled. Enable support for 64 bit addressing for RayTracing HSD-14016042915") DECLARE_DEBUG_VARIABLE(int32_t, Enable64bAddressingForRayTracing, -1, "-1: default, 0: disabled, 1: enabled. Enable support for 64 bit addressing for RayTracing HSD-14016042915")
DECLARE_DEBUG_VARIABLE(int32_t, EnableXe3VariableRegisterSizeAllocation, -1, "When enabled, use new Xe3 Variable Register per Thread (VRT) feature, -1: default, 0: disabled, 1: enabled") DECLARE_DEBUG_VARIABLE(int32_t, EnableXe3VariableRegisterSizeAllocation, -1, "When enabled, use new Xe3 Variable Register per Thread (VRT) feature, -1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, EnableL1FlushUavCoherencyMode, -1, "When enabled, state compute mode is configured with L1 flush for UAV coherency mode, -1: default, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, ResourceBarrierL1FlushMode, -1, "Invalidate or flush L1 cache in RESOURCE_BARRIER instruction. -1: default, 0: disabled, 1: invalidate L1, 2: flush L1, 3: both") DECLARE_DEBUG_VARIABLE(int32_t, ResourceBarrierL1FlushMode, -1, "Invalidate or flush L1 cache in RESOURCE_BARRIER instruction. -1: default, 0: disabled, 1: invalidate L1, 2: flush L1, 3: both")
/* IMPLICIT SCALING */ /* IMPLICIT SCALING */

View File

@@ -78,6 +78,11 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
maskBits |= Family::stateComputeModeLargeGrfModeMask; maskBits |= Family::stateComputeModeLargeGrfModeMask;
} }
if (properties.enableL1FlushUavCoherencyMode.isDirty) {
stateComputeMode.setUavCoherencyMode(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_FLUSH_DATAPORT_L1);
maskBits2 |= Family::stateComputeModeUavCoherencyModeMask;
}
stateComputeMode.setMask1(maskBits); stateComputeMode.setMask1(maskBits);
stateComputeMode.setMask2(maskBits2); stateComputeMode.setMask2(maskBits2);

View File

@@ -24,6 +24,7 @@ struct Xe2HpgCore {
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13); static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15); static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
// DW2 // DW2
static constexpr uint32_t stateComputeModeUavCoherencyModeMask = (1u << 6);
static constexpr uint32_t stateComputeModeMemoryAllocationForScratchAndMidthreadPreemptionBuffersMask = (1u << 11); static constexpr uint32_t stateComputeModeMemoryAllocationForScratchAndMidthreadPreemptionBuffersMask = (1u << 11);
static constexpr bool isUsingL3Control = false; static constexpr bool isUsingL3Control = false;
@@ -53,6 +54,7 @@ struct Xe2HpgCore {
static constexpr bool devicePreemptionMode = false; static constexpr bool devicePreemptionMode = false;
static constexpr bool allocationForScratchAndMidthreadPreemption = true; static constexpr bool allocationForScratchAndMidthreadPreemption = true;
static constexpr bool enableL1FlushUavCoherencyMode = false;
}; };
struct StateBaseAddressStateSupport { struct StateBaseAddressStateSupport {

View File

@@ -51,6 +51,10 @@ void ProductHelperHw<gfxProduct>::fillScmPropertiesSupportStructure(StateCompute
fillScmPropertiesSupportStructureBase(propertiesSupport); fillScmPropertiesSupportStructureBase(propertiesSupport);
propertiesSupport.allocationForScratchAndMidthreadPreemption = GfxProduct::StateComputeModeStateSupport::allocationForScratchAndMidthreadPreemption; propertiesSupport.allocationForScratchAndMidthreadPreemption = GfxProduct::StateComputeModeStateSupport::allocationForScratchAndMidthreadPreemption;
propertiesSupport.enableL1FlushUavCoherencyMode = GfxProduct::StateComputeModeStateSupport::enableL1FlushUavCoherencyMode;
if (debugManager.flags.EnableL1FlushUavCoherencyMode.get() != -1) {
propertiesSupport.enableL1FlushUavCoherencyMode = !!debugManager.flags.EnableL1FlushUavCoherencyMode.get();
}
} }
template <> template <>

View File

@@ -66,6 +66,11 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
maskBits |= Family::stateComputeModeEnableVariableRegisterSizeAllocationMask; maskBits |= Family::stateComputeModeEnableVariableRegisterSizeAllocationMask;
} }
if (properties.enableL1FlushUavCoherencyMode.isDirty) {
stateComputeMode.setUavCoherencyMode(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_FLUSH_DATAPORT_L1);
maskBits2 |= Family::stateComputeModeUavCoherencyModeMask;
}
stateComputeMode.setMask1(maskBits); stateComputeMode.setMask1(maskBits);
stateComputeMode.setMask2(maskBits2); stateComputeMode.setMask2(maskBits2);

View File

@@ -26,6 +26,7 @@ struct Xe3Core {
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13); static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15); static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
// DW2 // DW2
static constexpr uint32_t stateComputeModeUavCoherencyModeMask = (1u << 6);
static constexpr uint32_t bcsEngineCount = 1u; static constexpr uint32_t bcsEngineCount = 1u;
static constexpr uint32_t timestampPacketCount = 16u; static constexpr uint32_t timestampPacketCount = 16u;
@@ -55,6 +56,7 @@ struct Xe3Core {
static constexpr bool allocationForScratchAndMidthreadPreemption = true; static constexpr bool allocationForScratchAndMidthreadPreemption = true;
static constexpr bool enableVariableRegisterSizeAllocation = true; static constexpr bool enableVariableRegisterSizeAllocation = true;
static constexpr bool enableL1FlushUavCoherencyMode = false;
}; };
struct StateBaseAddressStateSupport { struct StateBaseAddressStateSupport {

View File

@@ -52,6 +52,11 @@ void ProductHelperHw<gfxProduct>::fillScmPropertiesSupportStructure(StateCompute
if (pipelinedEuThreadArbitration) { if (pipelinedEuThreadArbitration) {
propertiesSupport.pipelinedEuThreadArbitration = true; propertiesSupport.pipelinedEuThreadArbitration = true;
} }
propertiesSupport.enableL1FlushUavCoherencyMode = GfxProduct::StateComputeModeStateSupport::enableL1FlushUavCoherencyMode;
if (debugManager.flags.EnableL1FlushUavCoherencyMode.get() != -1) {
propertiesSupport.enableL1FlushUavCoherencyMode = !!debugManager.flags.EnableL1FlushUavCoherencyMode.get();
}
} }
template <> template <>

View File

@@ -30,6 +30,7 @@ struct Xe3pCore {
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13); static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15); static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
// DW2 // DW2
static constexpr uint32_t stateComputeModeUavCoherencyModeMask = (1u << 6);
static constexpr uint32_t stateComputeModeEnableOutOfBoundariesInTranslationExceptionMask = (1u << 7); static constexpr uint32_t stateComputeModeEnableOutOfBoundariesInTranslationExceptionMask = (1u << 7);
static constexpr uint32_t stateComputeModePageFaultExceptionEnableMask = (1u << 9); static constexpr uint32_t stateComputeModePageFaultExceptionEnableMask = (1u << 9);
static constexpr uint32_t stateComputeModeSystemMemoryReadFenceEnableMask = (1u << 11); static constexpr uint32_t stateComputeModeSystemMemoryReadFenceEnableMask = (1u << 11);
@@ -66,6 +67,7 @@ struct Xe3pCore {
static constexpr bool allocationForScratchAndMidthreadPreemption = true; static constexpr bool allocationForScratchAndMidthreadPreemption = true;
static constexpr bool enableVariableRegisterSizeAllocation = true; static constexpr bool enableVariableRegisterSizeAllocation = true;
static constexpr bool enableL1FlushUavCoherencyMode = false;
static constexpr bool enablePageFaultException = false; static constexpr bool enablePageFaultException = false;
static constexpr bool enableSystemMemoryReadFence = false; static constexpr bool enableSystemMemoryReadFence = false;
static constexpr bool enableMemoryException = false; static constexpr bool enableMemoryException = false;

View File

@@ -685,6 +685,7 @@ Enable512NumGrfs = 1
EnableUsmPoolResidencyTracking = -1 EnableUsmPoolResidencyTracking = -1
EnableUsmPoolLazyInit = -1 EnableUsmPoolLazyInit = -1
ForcePrintsRedirection = -1 ForcePrintsRedirection = -1
EnableL1FlushUavCoherencyMode = -1;
ResourceBarrierL1FlushMode = -1 ResourceBarrierL1FlushMode = -1
InitialCounterBasedEventValue = -1 InitialCounterBasedEventValue = -1
DirectSubmissionInitialSemaphoreValue = -1 DirectSubmissionInitialSemaphoreValue = -1

View File

@@ -1,11 +1,12 @@
/* /*
* Copyright (C) 2024 Intel Corporation * Copyright (C) 2024-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
*/ */
#include "shared/source/command_container/encode_surface_state.h" #include "shared/source/command_container/encode_surface_state.h"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/gmm_helper/gmm_lib.h" #include "shared/source/gmm_helper/gmm_lib.h"
#include "shared/source/os_interface/product_helper.h" #include "shared/source/os_interface/product_helper.h"
@@ -63,3 +64,54 @@ HWTEST2_F(CommandEncodeStatesTestXe2AndLater, whenDebugFlagIsDisabledForAdjustPi
HWTEST2_F(ImplicitScalingTests, GivenXeAtLeastHpg2WhenCheckingPipeControlStallRequiredThenExpectTrue, IsAtLeastXe2HpgCore) { HWTEST2_F(ImplicitScalingTests, GivenXeAtLeastHpg2WhenCheckingPipeControlStallRequiredThenExpectTrue, IsAtLeastXe2HpgCore) {
EXPECT_FALSE(ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired()); EXPECT_FALSE(ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired());
} }
HWTEST2_F(CommandEncodeStatesTestXe2AndLater, givenDebugFlagWhenProgrammingStateComputeModeThenEnableL1FlushUavCoherencyMode, IsAtLeastXe2HpgCore) {
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
DebugManagerStateRestore restore;
uint8_t buffer[sizeof(STATE_COMPUTE_MODE)]{};
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
{
// default
LinearStream linearStream(buffer, sizeof(buffer));
StreamProperties streamProperties{};
streamProperties.initSupport(rootDeviceEnvironment);
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false);
EncodeComputeMode<FamilyType>::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment);
auto &stateComputeModeCmd = *reinterpret_cast<STATE_COMPUTE_MODE *>(linearStream.getCpuBase());
EXPECT_EQ(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_DRAIN_DATAPORT_MODE, stateComputeModeCmd.getUavCoherencyMode());
}
{
// enabled
debugManager.flags.EnableL1FlushUavCoherencyMode.set(1);
LinearStream linearStream(buffer, sizeof(buffer));
StreamProperties streamProperties{};
streamProperties.initSupport(rootDeviceEnvironment);
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false);
EncodeComputeMode<FamilyType>::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment);
auto &stateComputeModeCmd = *reinterpret_cast<STATE_COMPUTE_MODE *>(linearStream.getCpuBase());
EXPECT_EQ(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_FLUSH_DATAPORT_L1, stateComputeModeCmd.getUavCoherencyMode());
}
{
// disabled
debugManager.flags.EnableL1FlushUavCoherencyMode.set(0);
LinearStream linearStream(buffer, sizeof(buffer));
StreamProperties streamProperties{};
streamProperties.initSupport(rootDeviceEnvironment);
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false);
EncodeComputeMode<FamilyType>::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment);
auto &stateComputeModeCmd = *reinterpret_cast<STATE_COMPUTE_MODE *>(linearStream.getCpuBase());
EXPECT_EQ(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_DRAIN_DATAPORT_MODE, stateComputeModeCmd.getUavCoherencyMode());
}
}