mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
feature: Add missing pipelined EU thread arbitration on Xe3
Related-To: NEO-13682 Signed-off-by: Vysochyn, Illia <illia.vysochyn@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
f25193c44d
commit
ca72dff1ab
@ -21,6 +21,7 @@ struct StateComputeModePropertiesSupport {
|
||||
bool devicePreemptionMode = false;
|
||||
bool allocationForScratchAndMidthreadPreemption = false;
|
||||
bool enableVariableRegisterSizeAllocation = false;
|
||||
bool pipelinedEuThreadArbitration = false;
|
||||
};
|
||||
|
||||
struct StateComputeModeProperties {
|
||||
@ -42,6 +43,8 @@ struct StateComputeModeProperties {
|
||||
|
||||
void copyPropertiesAll(const StateComputeModeProperties &properties);
|
||||
void copyPropertiesGrfNumberThreadArbitration(const StateComputeModeProperties &properties);
|
||||
void setPipelinedEuThreadArbitration();
|
||||
bool isPipelinedEuThreadArbitrationEnabled() const;
|
||||
|
||||
bool isDirty() const;
|
||||
void clearIsDirty();
|
||||
@ -64,6 +67,7 @@ struct StateComputeModeProperties {
|
||||
StateComputeModePropertiesSupport scmPropertiesSupport = {};
|
||||
int32_t defaultThreadArbitrationPolicy = 0;
|
||||
bool propertiesSupportLoaded = false;
|
||||
bool pipelinedEuThreadArbitration = false;
|
||||
};
|
||||
|
||||
struct FrontEndPropertiesSupport {
|
||||
|
@ -179,6 +179,10 @@ void StateComputeModeProperties::setPropertiesPerContext(bool requiresCoherency,
|
||||
this->enableVariableRegisterSizeAllocation.set(this->scmPropertiesSupport.enableVariableRegisterSizeAllocation);
|
||||
}
|
||||
|
||||
if (this->scmPropertiesSupport.pipelinedEuThreadArbitration) {
|
||||
setPipelinedEuThreadArbitration();
|
||||
}
|
||||
|
||||
setPropertiesExtraPerContext();
|
||||
if (clearDirtyState) {
|
||||
clearIsDirtyPerContext();
|
||||
@ -534,3 +538,11 @@ void StateBaseAddressProperties::clearIsDirty() {
|
||||
dynamicStateBaseAddress.isDirty = false;
|
||||
indirectObjectBaseAddress.isDirty = false;
|
||||
}
|
||||
|
||||
void StateComputeModeProperties::setPipelinedEuThreadArbitration() {
|
||||
this->pipelinedEuThreadArbitration = true;
|
||||
}
|
||||
|
||||
bool StateComputeModeProperties::isPipelinedEuThreadArbitrationEnabled() const {
|
||||
return pipelinedEuThreadArbitration;
|
||||
}
|
||||
|
@ -306,6 +306,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdFifoPollInterval, -1, "-1: default , > 0
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdInterruptTimeout, -1, "-1: default , > 0: interruptTimeout based on input in milliseconds. Default is 2000 milliseconds")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DebugUmdMaxReadWriteRetry, -1, "-1: default , > 0: max pread/pwrite retry attempts in read/writeGpuMemory calls based on input in milliseconds. Default is 3")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceIndirectDetectionForCMKernels, -1, "-1: default , 0 : disable indirect detection for CM kernels, 1 : enable indirect detection for CM kernels")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PipelinedEuThreadArbitration, -1, "-1: default. 1: Use Walker field, 0: Use StateComputeMode command to program pipelinedEuThreadArbitration")
|
||||
DECLARE_DEBUG_VARIABLE(bool, ForceUseOnlyGlobalTimestamps, 0, "0- default disabled, 1: enable use only global timestamp")
|
||||
|
||||
/*LOGGING FLAGS*/
|
||||
|
@ -6950,7 +6950,8 @@ typedef struct tagSTATE_COMPUTE_MODE {
|
||||
uint32_t Reserved_37 : BITFIELD_RANGE(5, 6);
|
||||
uint32_t AsyncComputeThreadLimit : BITFIELD_RANGE(7, 9);
|
||||
uint32_t EnableVariableRegisterSizeAllocation_Vrt : BITFIELD_RANGE(10, 10);
|
||||
uint32_t Reserved_43 : BITFIELD_RANGE(11, 12);
|
||||
uint32_t Reserved_43 : BITFIELD_RANGE(11, 11);
|
||||
uint32_t EnablePipelinedEuThreadArbitration : BITFIELD_RANGE(12, 12);
|
||||
uint32_t EuThreadSchedulingMode : BITFIELD_RANGE(13, 14);
|
||||
uint32_t LargeGrfMode : BITFIELD_RANGE(15, 15);
|
||||
uint32_t Mask1 : BITFIELD_RANGE(16, 31);
|
||||
@ -6959,7 +6960,7 @@ typedef struct tagSTATE_COMPUTE_MODE {
|
||||
uint32_t MidthreadPreemptionOverdispatchThreadGroupCount : BITFIELD_RANGE(3, 4);
|
||||
uint32_t MidthreadPreemptionOverdispatchTestMode : BITFIELD_RANGE(5, 5);
|
||||
uint32_t UavCoherencyMode : BITFIELD_RANGE(6, 6);
|
||||
uint32_t Reserved_76 : BITFIELD_RANGE(7, 15);
|
||||
uint32_t Reserved_71 : BITFIELD_RANGE(7, 15);
|
||||
uint32_t Mask2 : BITFIELD_RANGE(16, 31);
|
||||
} Common;
|
||||
uint32_t RawData[3];
|
||||
@ -7076,6 +7077,12 @@ typedef struct tagSTATE_COMPUTE_MODE {
|
||||
inline bool getEnableVariableRegisterSizeAllocationVrt() const {
|
||||
return TheStructure.Common.EnableVariableRegisterSizeAllocation_Vrt;
|
||||
}
|
||||
inline void setEnablePipelinedEuThreadArbitration(const bool value) {
|
||||
TheStructure.Common.EnablePipelinedEuThreadArbitration = value;
|
||||
}
|
||||
inline bool getEnablePipelinedEuThreadArbitration() const {
|
||||
return TheStructure.Common.EnablePipelinedEuThreadArbitration;
|
||||
}
|
||||
inline void setEuThreadSchedulingMode(const EU_THREAD_SCHEDULING_MODE value) {
|
||||
TheStructure.Common.EuThreadSchedulingMode = value;
|
||||
}
|
||||
|
@ -32,7 +32,10 @@ void EncodeComputeMode<Family>::programComputeModeCommand(LinearStream &csr, Sta
|
||||
auto maskBits = stateComputeMode.getMask1();
|
||||
auto maskBits2 = stateComputeMode.getMask2();
|
||||
|
||||
if (properties.threadArbitrationPolicy.isDirty) {
|
||||
if (properties.isPipelinedEuThreadArbitrationEnabled()) {
|
||||
stateComputeMode.setEnablePipelinedEuThreadArbitration(true);
|
||||
maskBits |= Family::stateComputeModePipelinedEuThreadArbitrationMask;
|
||||
} else if (properties.threadArbitrationPolicy.isDirty) {
|
||||
switch (properties.threadArbitrationPolicy.value) {
|
||||
case ThreadArbitrationPolicy::RoundRobin:
|
||||
stateComputeMode.setEuThreadSchedulingMode(STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE::EU_THREAD_SCHEDULING_MODE_ROUND_ROBIN);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
* Copyright (C) 2024-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -24,6 +24,7 @@ struct Xe3Core {
|
||||
#include "shared/source/generated/xe3_core/hw_cmds_generated_xe3_core.inl"
|
||||
|
||||
static constexpr uint32_t stateComputeModeEnableVariableRegisterSizeAllocationMask = (1u << 10);
|
||||
static constexpr uint32_t stateComputeModePipelinedEuThreadArbitrationMask = (1u << 12);
|
||||
static constexpr uint32_t stateComputeModeEuThreadSchedulingModeOverrideMask = (0b11u << 13);
|
||||
static constexpr uint32_t stateComputeModeLargeGrfModeMask = (1u << 15);
|
||||
// DW2
|
||||
|
@ -31,6 +31,15 @@ void ProductHelperHw<gfxProduct>::fillScmPropertiesSupportStructure(StateCompute
|
||||
propertiesSupport.enableVariableRegisterSizeAllocation = !!debugManager.flags.EnableXe3VariableRegisterSizeAllocation.get();
|
||||
}
|
||||
propertiesSupport.largeGrfMode = !propertiesSupport.enableVariableRegisterSizeAllocation;
|
||||
|
||||
bool pipelinedEuThreadArbitration = true;
|
||||
if (debugManager.flags.PipelinedEuThreadArbitration.get() != -1) {
|
||||
pipelinedEuThreadArbitration = !!debugManager.flags.PipelinedEuThreadArbitration.get();
|
||||
}
|
||||
|
||||
if (pipelinedEuThreadArbitration) {
|
||||
propertiesSupport.pipelinedEuThreadArbitration = true;
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
|
@ -662,4 +662,5 @@ LogIndirectDetectionKernelDetails = 0
|
||||
DirectSubmissionRelaxedOrderingCounterHeuristic = -1
|
||||
DirectSubmissionRelaxedOrderingCounterHeuristicTreshold = -1
|
||||
ClearStandaloneInOrderTimestampAllocation = -1
|
||||
PipelinedEuThreadArbitration = -1
|
||||
# Please don't edit below this line
|
||||
|
@ -153,7 +153,7 @@ XE3_CORETEST_F(ComputeModeRequirementsXe3Core, giventhreadArbitrationPolicyWitho
|
||||
hwParser.parseCommands<FamilyType>(getCsrHw<FamilyType>()->commandStream, startOffset);
|
||||
bool foundOne = false;
|
||||
|
||||
uint32_t expectedMask = FamilyType::stateComputeModeEuThreadSchedulingModeOverrideMask;
|
||||
uint32_t expectedMask = FamilyType::stateComputeModePipelinedEuThreadArbitrationMask;
|
||||
|
||||
for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) {
|
||||
auto cmd = genCmdCast<STATE_COMPUTE_MODE *>(*it);
|
||||
|
@ -382,6 +382,31 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenKernelUse
|
||||
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
|
||||
}
|
||||
|
||||
XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagSetWhenSetPropertiesAllCalledThenDisablePipelinedThreadArbitrationPolicy) {
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
MockExecutionEnvironment executionEnvironment{};
|
||||
auto &rootDeviceEnvironment = *executionEnvironment.rootDeviceEnvironments[0];
|
||||
|
||||
{
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.initSupport(rootDeviceEnvironment);
|
||||
|
||||
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled);
|
||||
EXPECT_TRUE(streamProperties.stateComputeMode.isPipelinedEuThreadArbitrationEnabled());
|
||||
}
|
||||
|
||||
{
|
||||
debugManager.flags.PipelinedEuThreadArbitration.set(0);
|
||||
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.initSupport(rootDeviceEnvironment);
|
||||
|
||||
streamProperties.stateComputeMode.setPropertiesAll(false, 0, 0, PreemptionMode::Disabled);
|
||||
EXPECT_FALSE(streamProperties.stateComputeMode.isPipelinedEuThreadArbitrationEnabled());
|
||||
}
|
||||
}
|
||||
|
||||
XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagWhenProgrammingStateComputeModeThenEnableVrtFieldIsCorrectlySet) {
|
||||
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||
|
||||
|
Reference in New Issue
Block a user