feature: Add missing pipelined EU thread arbitration on Xe3

Related-To: NEO-13682

Signed-off-by: Vysochyn, Illia <illia.vysochyn@intel.com>
This commit is contained in:
Vysochyn, Illia
2025-01-14 19:34:35 +00:00
committed by Compute-Runtime-Automation
parent f25193c44d
commit ca72dff1ab
10 changed files with 68 additions and 5 deletions

View File

@ -21,6 +21,7 @@ struct StateComputeModePropertiesSupport {
bool devicePreemptionMode = false;
bool allocationForScratchAndMidthreadPreemption = false;
bool enableVariableRegisterSizeAllocation = false;
bool pipelinedEuThreadArbitration = false;
};
struct StateComputeModeProperties {
@ -42,6 +43,8 @@ struct StateComputeModeProperties {
void copyPropertiesAll(const StateComputeModeProperties &properties);
void copyPropertiesGrfNumberThreadArbitration(const StateComputeModeProperties &properties);
void setPipelinedEuThreadArbitration();
bool isPipelinedEuThreadArbitrationEnabled() const;
bool isDirty() const;
void clearIsDirty();
@ -64,6 +67,7 @@ struct StateComputeModeProperties {
StateComputeModePropertiesSupport scmPropertiesSupport = {};
int32_t defaultThreadArbitrationPolicy = 0;
bool propertiesSupportLoaded = false;
bool pipelinedEuThreadArbitration = false;
};
struct FrontEndPropertiesSupport {

View File

@ -179,6 +179,10 @@ void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency,
this->enableVariableRegisterSizeAllocation.set(this->scmPropertiesSupport.enableVariableRegisterSizeAllocation);
}
if (this->scmPropertiesSupport.pipelinedEuThreadArbitration) {
setPipelinedEuThreadArbitration();
}
setPropertiesExtraPerContext();
if (clearDirtyState) {
clearIsDirtyPerContext();
@ -534,3 +538,11 @@ void StateBaseAddressProperties::clearIsDirty() {
dynamicStateBaseAddress.isDirty = false;
indirectObjectBaseAddress.isDirty = false;
}
void StateComputeModeProperties::setPipelinedEuThreadArbitration() {
this->pipelinedEuThreadArbitration = true;
}
bool StateComputeModeProperties::isPipelinedEuThreadArbitrationEnabled() const {
return pipelinedEuThreadArbitration;
}

View File

@ -306,6 +306,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdFifoPollInterval, -1, "-1: default , > 0
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdInterruptTimeout, -1, "-1: default , > 0: interruptTimeout based on input in milliseconds. Default is 2000 milliseconds")
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > 0: max pread/pwrite retry attempts in read/writeGpuMemory calls based on input in milliseconds. Default is 3")
DECLARE_DEBUG_VARIABLE(int32_t, ForceIndirectDetectionForCMKernels, -1, "-1: default , 0 : disable indirect detection for CM kernels, 1 : enable indirect detection for CM kernels")
DECLARE_DEBUG_VARIABLE(int32_t, PipelinedEuThreadArbitration, -1, "-1: default. 1: Use Walker field, 0: Use StateComputeMode command to program pipelinedEuThreadArbitration")
DECLARE_DEBUG_VARIABLE(bool, ForceUseOnlyGlobalTimestamps, 0, "0- default disabled, 1: enable use only global timestamp")
/*LOGGING FLAGS*/

View File

@ -6950,7 +6950,8 @@ typedef struct tagSTATE_COMPUTE_MODE {
uint32_t Reserved_37 : BITFIELD_RANGE(5, 6);
uint32_t AsyncComputeThreadLimit : BITFIELD_RANGE(7, 9);
uint32_t EnableVariableRegisterSizeAllocation_Vrt : BITFIELD_RANGE(10, 10);
uint32_t Reserved_43 : BITFIELD_RANGE(11, 12);
uint32_t Reserved_43 : BITFIELD_RANGE(11, 11);
uint32_t EnablePipelinedEuThreadArbitration : BITFIELD_RANGE(12, 12);
uint32_t EuThreadSchedulingMode : BITFIELD_RANGE(13, 14);
uint32_t LargeGrfMode : BITFIELD_RANGE(15, 15);
uint32_t Mask1 : BITFIELD_RANGE(16, 31);
@ -6959,7 +6960,7 @@ typedef struct tagSTATE_COMPUTE_MODE {
uint32_t MidthreadPreemptionOverdispatchThreadGroupCount : BITFIELD_RANGE(3, 4);
uint32_t MidthreadPreemptionOverdispatchTestMode : BITFIELD_RANGE(5, 5);
uint32_t UavCoherencyMode : BITFIELD_RANGE(6, 6);
uint32_t Reserved_76 : BITFIELD_RANGE(7, 15);
uint32_t Reserved_71 : BITFIELD_RANGE(7, 15);
uint32_t Mask2 : BITFIELD_RANGE(16, 31);
} Common;
uint32_t RawData[3];
@ -7076,6 +7077,12 @@ typedef struct tagSTATE_COMPUTE_MODE {
inline bool getEnableVariableRegisterSizeAllocationVrt() const {
return TheStructure.Common.EnableVariableRegisterSizeAllocation_Vrt;
}
inline void setEnablePipelinedEuThreadArbitration(const bool value) {
TheStructure.Common.EnablePipelinedEuThreadArbitration = value;
}
inline bool getEnablePipelinedEuThreadArbitration() const {
return TheStructure.Common.EnablePipelinedEuThreadArbitration;
}
inline void setEuThreadSchedulingMode(const EU_THREAD_SCHEDULING_MODE value) {
TheStructure.Common.EuThreadSchedulingMode = value;
}

View File

@ -32,7 +32,10 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
auto maskBits = stateComputeMode.getMask1();
auto maskBits2 = stateComputeMode.getMask2();
if (properties.threadArbitrationPolicy.isDirty) {
if (properties.isPipelinedEuThreadArbitrationEnabled()) {
stateComputeMode.setEnablePipelinedEuThreadArbitration(true);
maskBits |= Family::stateComputeModePipelinedEuThreadArbitrationMask;
} else if (properties.threadArbitrationPolicy.isDirty) {
switch (properties.threadArbitrationPolicy.value) {
case ThreadArbitrationPolicy::RoundRobin:
stateComputeMode.setEuThreadSchedulingMode(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE::EU_THREAD_SCHEDULING_MODE_ROUND_ROBIN);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -24,6 +24,7 @@ struct Xe3Core {
#include "shared/source/generated/xe3_core/hw_cmds_generated_xe3_core.inl"
static constexpr uint32_t stateComputeModeEnableVariableRegisterSizeAllocationMask = (1u << 10);
static constexpr uint32_t stateComputeModePipelinedEuThreadArbitrationMask = (1u << 12);
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
// DW2

View File

@ -31,6 +31,15 @@ void ProductHelperHw<gfxProduct>::fillScmPropertiesSupportStructure(StateCompute
propertiesSupport.enableVariableRegisterSizeAllocation = !!debugManager.flags.EnableXe3VariableRegisterSizeAllocation.get();
}
propertiesSupport.largeGrfMode = !propertiesSupport.enableVariableRegisterSizeAllocation;
bool pipelinedEuThreadArbitration = true;
if (debugManager.flags.PipelinedEuThreadArbitration.get() != -1) {
pipelinedEuThreadArbitration = !!debugManager.flags.PipelinedEuThreadArbitration.get();
}
if (pipelinedEuThreadArbitration) {
propertiesSupport.pipelinedEuThreadArbitration = true;
}
}
template <>

View File

@ -662,4 +662,5 @@ LogIndirectDetectionKernelDetails = 0
DirectSubmissionRelaxedOrderingCounterHeuristic = -1
DirectSubmissionRelaxedOrderingCounterHeuristicTreshold = -1
ClearStandaloneInOrderTimestampAllocation = -1
PipelinedEuThreadArbitration = -1
# Please don't edit below this line

View File

@ -153,7 +153,7 @@ XE3_CORETEST_F(ComputeModeRequirementsXe3Core, giventhreadArbitrationPolicyWitho
hwParser.parseCommands<FamilyType>(getCsrHw<FamilyType>()->commandStream, startOffset);
bool foundOne = false;
uint32_t expectedMask = FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask;
uint32_t expectedMask = FamilyType::stateComputeModePipelinedEuThreadArbitrationMask;
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
auto cmd = genCmdCast<STATE_COMPUTE_MODE *>(*it);

View File

@ -382,6 +382,31 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenKernelUse
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
}
XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagSetWhenSetPropertiesAllCalledThenDisablePipelinedThreadArbitrationPolicy) {
DebugManagerStateRestore restore;
MockExecutionEnvironment executionEnvironment{};
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
{
StreamProperties streamProperties{};
streamProperties.initSupport(rootDeviceEnvironment);
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled);
EXPECT_TRUE(streamProperties.stateComputeMode.isPipelinedEuThreadArbitrationEnabled());
}
{
debugManager.flags.PipelinedEuThreadArbitration.set(0);
StreamProperties streamProperties{};
streamProperties.initSupport(rootDeviceEnvironment);
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled);
EXPECT_FALSE(streamProperties.stateComputeMode.isPipelinedEuThreadArbitrationEnabled());
}
}
XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagWhenProgrammingStateComputeModeThenEnableVrtFieldIsCorrectlySet) {
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;