mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-17 04:54:48 +08:00
refactor: add infrastructure for setting L1 flush mode
Related-To: NEO-15936 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c7b31ad0da
commit
c265bc692f
@@ -23,6 +23,7 @@ struct StateComputeModePropertiesSupport {
|
||||
bool allocationForScratchAndMidthreadPreemption = false;
|
||||
bool enableVariableRegisterSizeAllocation = false;
|
||||
bool pipelinedEuThreadArbitration = false;
|
||||
bool enableL1FlushUavCoherencyMode = false;
|
||||
bool lscSamplerBackingThreshold = false;
|
||||
bool enableOutOfBoundariesInTranslationException = false;
|
||||
bool enablePageFaultException = false;
|
||||
@@ -42,6 +43,7 @@ struct StateComputeModeProperties {
|
||||
StreamProperty memoryAllocationForScratchAndMidthreadPreemptionBuffers{};
|
||||
StreamProperty enableVariableRegisterSizeAllocation{};
|
||||
StreamProperty pipelinedEuThreadArbitration{};
|
||||
StreamProperty enableL1FlushUavCoherencyMode{};
|
||||
StreamProperty lscSamplerBackingThreshold{};
|
||||
StreamProperty enableOutOfBoundariesInTranslationException{};
|
||||
StreamProperty enablePageFaultException{};
|
||||
|
||||
@@ -62,6 +62,7 @@ void StateComputeModeProperties::copyPropertiesAll(const StateComputeModePropert
|
||||
memoryAllocationForScratchAndMidthreadPreemptionBuffers.set(properties.memoryAllocationForScratchAndMidthreadPreemptionBuffers.value);
|
||||
enableVariableRegisterSizeAllocation.set(properties.enableVariableRegisterSizeAllocation.value);
|
||||
pipelinedEuThreadArbitration.set(properties.pipelinedEuThreadArbitration.value);
|
||||
enableL1FlushUavCoherencyMode.set(properties.enableL1FlushUavCoherencyMode.value);
|
||||
enablePageFaultException.set(properties.enablePageFaultException.value);
|
||||
enableSystemMemoryReadFence.set(properties.enableSystemMemoryReadFence.value);
|
||||
enableMemoryException.set(properties.enableMemoryException.value);
|
||||
@@ -93,6 +94,7 @@ bool StateComputeModeProperties::isDirty() const {
|
||||
memoryAllocationForScratchAndMidthreadPreemptionBuffers.isDirty ||
|
||||
enableVariableRegisterSizeAllocation.isDirty ||
|
||||
pipelinedEuThreadArbitration.isDirty ||
|
||||
enableL1FlushUavCoherencyMode.isDirty ||
|
||||
enablePageFaultException.isDirty ||
|
||||
enableSystemMemoryReadFence.isDirty ||
|
||||
enableMemoryException.isDirty ||
|
||||
@@ -118,6 +120,7 @@ void StateComputeModeProperties::clearIsDirtyPerContext() {
|
||||
devicePreemptionMode.isDirty = false;
|
||||
enableVariableRegisterSizeAllocation.isDirty = false;
|
||||
pipelinedEuThreadArbitration.isDirty = false;
|
||||
enableL1FlushUavCoherencyMode.isDirty = false;
|
||||
enablePageFaultException.isDirty = false;
|
||||
enableSystemMemoryReadFence.isDirty = false;
|
||||
enableMemoryException.isDirty = false;
|
||||
@@ -188,6 +191,7 @@ void StateComputeModeProperties::resetState() {
|
||||
this->memoryAllocationForScratchAndMidthreadPreemptionBuffers.value = StreamProperty::initValue;
|
||||
this->enableVariableRegisterSizeAllocation.value = StreamProperty::initValue;
|
||||
this->pipelinedEuThreadArbitration.value = StreamProperty::initValue;
|
||||
this->enableL1FlushUavCoherencyMode.value = StreamProperty::initValue;
|
||||
this->enablePageFaultException.value = StreamProperty::initValue;
|
||||
this->enableSystemMemoryReadFence.value = StreamProperty::initValue;
|
||||
this->enableMemoryException.value = StreamProperty::initValue;
|
||||
@@ -216,6 +220,10 @@ void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency,
|
||||
this->pipelinedEuThreadArbitration.set(true);
|
||||
}
|
||||
|
||||
if (this->scmPropertiesSupport.enableL1FlushUavCoherencyMode) {
|
||||
this->enableL1FlushUavCoherencyMode.set(this->scmPropertiesSupport.enableL1FlushUavCoherencyMode);
|
||||
}
|
||||
|
||||
if (this->scmPropertiesSupport.enablePageFaultException) {
|
||||
this->enablePageFaultException.set(this->scmPropertiesSupport.enablePageFaultException);
|
||||
}
|
||||
|
||||
@@ -584,6 +584,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceDispatchTimeoutCounter, -1, "Set timeout fo
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideNumThreadsPerEu, -1, "-1: default, >0: force number of threads per EU")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, Enable64bAddressingForRayTracing, -1, "-1: default, 0: disabled, 1: enabled. Enable support for 64 bit addressing for RayTracing HSD-14016042915")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableXe3VariableRegisterSizeAllocation, -1, "When enabled, use new Xe3 Variable Register per Thread (VRT) feature, -1: default, 0: disabled, 1: enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableL1FlushUavCoherencyMode, -1, "When enabled, state compute mode is configured with L1 flush for UAV coherency mode, -1: default, 0: disabled, 1: enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ResourceBarrierL1FlushMode, -1, "Invalidate or flush L1 cache in RESOURCE_BARRIER instruction. -1: default, 0: disabled, 1: invalidate L1, 2: flush L1, 3: both")
|
||||
|
||||
/* IMPLICIT SCALING */
|
||||
|
||||
@@ -78,6 +78,11 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
|
||||
maskBits |= Family::stateComputeModeLargeGrfModeMask;
|
||||
}
|
||||
|
||||
if (properties.enableL1FlushUavCoherencyMode.isDirty) {
|
||||
stateComputeMode.setUavCoherencyMode(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_FLUSH_DATAPORT_L1);
|
||||
maskBits2 |= Family::stateComputeModeUavCoherencyModeMask;
|
||||
}
|
||||
|
||||
stateComputeMode.setMask1(maskBits);
|
||||
stateComputeMode.setMask2(maskBits2);
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ struct Xe2HpgCore {
|
||||
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
|
||||
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
|
||||
// DW2
|
||||
static constexpr uint32_t stateComputeModeUavCoherencyModeMask = (1u << 6);
|
||||
static constexpr uint32_t stateComputeModeMemoryAllocationForScratchAndMidthreadPreemptionBuffersMask = (1u << 11);
|
||||
|
||||
static constexpr bool isUsingL3Control = false;
|
||||
@@ -53,6 +54,7 @@ struct Xe2HpgCore {
|
||||
static constexpr bool devicePreemptionMode = false;
|
||||
|
||||
static constexpr bool allocationForScratchAndMidthreadPreemption = true;
|
||||
static constexpr bool enableL1FlushUavCoherencyMode = false;
|
||||
};
|
||||
|
||||
struct StateBaseAddressStateSupport {
|
||||
|
||||
@@ -51,6 +51,10 @@ void ProductHelperHw<gfxProduct>::fillScmPropertiesSupportStructure(StateCompute
|
||||
|
||||
fillScmPropertiesSupportStructureBase(propertiesSupport);
|
||||
propertiesSupport.allocationForScratchAndMidthreadPreemption = GfxProduct::StateComputeModeStateSupport::allocationForScratchAndMidthreadPreemption;
|
||||
propertiesSupport.enableL1FlushUavCoherencyMode = GfxProduct::StateComputeModeStateSupport::enableL1FlushUavCoherencyMode;
|
||||
if (debugManager.flags.EnableL1FlushUavCoherencyMode.get() != -1) {
|
||||
propertiesSupport.enableL1FlushUavCoherencyMode = !!debugManager.flags.EnableL1FlushUavCoherencyMode.get();
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -66,6 +66,11 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
|
||||
maskBits |= Family::stateComputeModeEnableVariableRegisterSizeAllocationMask;
|
||||
}
|
||||
|
||||
if (properties.enableL1FlushUavCoherencyMode.isDirty) {
|
||||
stateComputeMode.setUavCoherencyMode(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_FLUSH_DATAPORT_L1);
|
||||
maskBits2 |= Family::stateComputeModeUavCoherencyModeMask;
|
||||
}
|
||||
|
||||
stateComputeMode.setMask1(maskBits);
|
||||
stateComputeMode.setMask2(maskBits2);
|
||||
|
||||
|
||||
@@ -26,6 +26,7 @@ struct Xe3Core {
|
||||
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
|
||||
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
|
||||
// DW2
|
||||
static constexpr uint32_t stateComputeModeUavCoherencyModeMask = (1u << 6);
|
||||
static constexpr uint32_t bcsEngineCount = 1u;
|
||||
static constexpr uint32_t timestampPacketCount = 16u;
|
||||
|
||||
@@ -55,6 +56,7 @@ struct Xe3Core {
|
||||
|
||||
static constexpr bool allocationForScratchAndMidthreadPreemption = true;
|
||||
static constexpr bool enableVariableRegisterSizeAllocation = true;
|
||||
static constexpr bool enableL1FlushUavCoherencyMode = false;
|
||||
};
|
||||
|
||||
struct StateBaseAddressStateSupport {
|
||||
|
||||
@@ -52,6 +52,11 @@ void ProductHelperHw<gfxProduct>::fillScmPropertiesSupportStructure(StateCompute
|
||||
if (pipelinedEuThreadArbitration) {
|
||||
propertiesSupport.pipelinedEuThreadArbitration = true;
|
||||
}
|
||||
|
||||
propertiesSupport.enableL1FlushUavCoherencyMode = GfxProduct::StateComputeModeStateSupport::enableL1FlushUavCoherencyMode;
|
||||
if (debugManager.flags.EnableL1FlushUavCoherencyMode.get() != -1) {
|
||||
propertiesSupport.enableL1FlushUavCoherencyMode = !!debugManager.flags.EnableL1FlushUavCoherencyMode.get();
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
|
||||
@@ -30,6 +30,7 @@ struct Xe3pCore {
|
||||
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
|
||||
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
|
||||
// DW2
|
||||
static constexpr uint32_t stateComputeModeUavCoherencyModeMask = (1u << 6);
|
||||
static constexpr uint32_t stateComputeModeEnableOutOfBoundariesInTranslationExceptionMask = (1u << 7);
|
||||
static constexpr uint32_t stateComputeModePageFaultExceptionEnableMask = (1u << 9);
|
||||
static constexpr uint32_t stateComputeModeSystemMemoryReadFenceEnableMask = (1u << 11);
|
||||
@@ -66,6 +67,7 @@ struct Xe3pCore {
|
||||
|
||||
static constexpr bool allocationForScratchAndMidthreadPreemption = true;
|
||||
static constexpr bool enableVariableRegisterSizeAllocation = true;
|
||||
static constexpr bool enableL1FlushUavCoherencyMode = false;
|
||||
static constexpr bool enablePageFaultException = false;
|
||||
static constexpr bool enableSystemMemoryReadFence = false;
|
||||
static constexpr bool enableMemoryException = false;
|
||||
|
||||
@@ -685,6 +685,7 @@ Enable512NumGrfs = 1
|
||||
EnableUsmPoolResidencyTracking = -1
|
||||
EnableUsmPoolLazyInit = -1
|
||||
ForcePrintsRedirection = -1
|
||||
EnableL1FlushUavCoherencyMode = -1;
|
||||
ResourceBarrierL1FlushMode = -1
|
||||
InitialCounterBasedEventValue = -1
|
||||
DirectSubmissionInitialSemaphoreValue = -1
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
* Copyright (C) 2024-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/encode_surface_state.h"
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/gmm_helper/gmm_lib.h"
|
||||
#include "shared/source/os_interface/product_helper.h"
|
||||
@@ -63,3 +64,54 @@ HWTEST2_F(CommandEncodeStatesTestXe2AndLater, whenDebugFlagIsDisabledForAdjustPi
|
||||
HWTEST2_F(ImplicitScalingTests, GivenXeAtLeastHpg2WhenCheckingPipeControlStallRequiredThenExpectTrue, IsAtLeastXe2HpgCore) {
|
||||
EXPECT_FALSE(ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired());
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandEncodeStatesTestXe2AndLater, givenDebugFlagWhenProgrammingStateComputeModeThenEnableL1FlushUavCoherencyMode, IsAtLeastXe2HpgCore) {
|
||||
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
uint8_t buffer[sizeof(STATE_COMPUTE_MODE)]{};
|
||||
const auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
|
||||
{
|
||||
// default
|
||||
LinearStream linearStream(buffer, sizeof(buffer));
|
||||
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.initSupport(rootDeviceEnvironment);
|
||||
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false);
|
||||
EncodeComputeMode<FamilyType>::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment);
|
||||
|
||||
auto &stateComputeModeCmd = *reinterpret_cast<STATE_COMPUTE_MODE *>(linearStream.getCpuBase());
|
||||
EXPECT_EQ(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_DRAIN_DATAPORT_MODE, stateComputeModeCmd.getUavCoherencyMode());
|
||||
}
|
||||
|
||||
{
|
||||
// enabled
|
||||
debugManager.flags.EnableL1FlushUavCoherencyMode.set(1);
|
||||
|
||||
LinearStream linearStream(buffer, sizeof(buffer));
|
||||
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.initSupport(rootDeviceEnvironment);
|
||||
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false);
|
||||
EncodeComputeMode<FamilyType>::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment);
|
||||
|
||||
auto &stateComputeModeCmd = *reinterpret_cast<STATE_COMPUTE_MODE *>(linearStream.getCpuBase());
|
||||
EXPECT_EQ(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_FLUSH_DATAPORT_L1, stateComputeModeCmd.getUavCoherencyMode());
|
||||
}
|
||||
|
||||
{
|
||||
// disabled
|
||||
debugManager.flags.EnableL1FlushUavCoherencyMode.set(0);
|
||||
|
||||
LinearStream linearStream(buffer, sizeof(buffer));
|
||||
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.initSupport(rootDeviceEnvironment);
|
||||
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled, false);
|
||||
EncodeComputeMode<FamilyType>::programComputeModeCommand(linearStream, streamProperties.stateComputeMode, rootDeviceEnvironment);
|
||||
|
||||
auto &stateComputeModeCmd = *reinterpret_cast<STATE_COMPUTE_MODE *>(linearStream.getCpuBase());
|
||||
EXPECT_EQ(STATE_COMPUTE_MODE::UAV_COHERENCY_MODE::UAV_COHERENCY_MODE_DRAIN_DATAPORT_MODE, stateComputeModeCmd.getUavCoherencyMode());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user