Add debug variable to control whether large grf should be programmed with SCM

Add debug variable ForceGrfNumProgrammingWithScm.
Do not update large grf value in StreamProperties when unnecessary.

Related-To: NEO-6659

Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
This commit is contained in:
Filip Hazubski 2022-04-29 14:33:41 +00:00 committed by Compute-Runtime-Automation
parent 6e8cabdce5
commit 3413039a69
15 changed files with 58 additions and 14 deletions

View File

@ -731,6 +731,7 @@ struct ProgramChangedFieldsInComputeMode {
};
HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStreamPropertiesIsCalledTwiceThenChangedFieldsAreDirty, ProgramChangedFieldsInComputeMode) {
DebugManagerStateRestore restorer;
auto &hwInfoConfig = *NEO::HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
@ -743,11 +744,11 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStr
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
pCommandList->updateStreamProperties(kernel, false, false);
EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_EQ(hwInfoConfig.isGrfNumReportedWithScm(), pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
pCommandList->updateStreamProperties(kernel, false, false);
EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_EQ(hwInfoConfig.isGrfNumReportedWithScm(), pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
}
struct ProgramAllFieldsInComputeMode {
@ -758,6 +759,7 @@ struct ProgramAllFieldsInComputeMode {
};
HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModeTraitsSetToFalsePropertiesWhenUpdateStreamPropertiesIsCalledTwiceThenAllFieldsAreDirty, ProgramAllFieldsInComputeMode) {
DebugManagerStateRestore restorer;
auto &hwInfoConfig = *NEO::HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
@ -770,16 +772,17 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModeTraitsSetToFalsePropert
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
pCommandList->updateStreamProperties(kernel, false, false);
EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_EQ(hwInfoConfig.isGrfNumReportedWithScm(), pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
pCommandList->updateStreamProperties(kernel, false, false);
EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_EQ(hwInfoConfig.isGrfNumReportedWithScm(), pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
}
HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertesNotChangedThenAllFieldsAreNotDirty, IsAtLeastSkl) {
DebugManagerStateRestore restorer;
auto &hwInfoConfig = *NEO::HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
@ -792,11 +795,10 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertes
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
pCommandList->updateStreamProperties(kernel, false, false);
EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
EXPECT_TRUE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_EQ(hwInfoConfig.isGrfNumReportedWithScm(), pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
pCommandList->updateStreamProperties(kernel, false, false);
EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
EXPECT_FALSE(pCommandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
}

View File

@ -69,11 +69,12 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
int expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*defaultHwInfo);
bool expectedIsCoherencyRequired = hwHelper.forceNonGpuCoherencyWA(true);
int expectedLargeGrfMode = hwInfoConfig.isGrfNumReportedWithScm() ? 1 : -1;
EXPECT_EQ(1, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value);
EXPECT_EQ(1, currentCsrStreamProperties.frontEndState.disableEUFusion.value);
EXPECT_EQ(expectedDisableOverdispatch, currentCsrStreamProperties.frontEndState.disableOverdispatch.value);
EXPECT_EQ(expectedIsCoherencyRequired, currentCsrStreamProperties.stateComputeMode.isCoherencyRequired.value);
EXPECT_EQ(1, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(expectedLargeGrfMode, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value);
commandListImmediate.requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value = 0;
@ -84,11 +85,12 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
commandListImmediate.requiredStreamState.stateComputeMode.threadArbitrationPolicy.value = NEO::ThreadArbitrationPolicy::AgeBased;
commandListImmediate.executeCommandListImmediateWithFlushTask(false);
expectedLargeGrfMode = hwInfoConfig.isGrfNumReportedWithScm() ? 0 : -1;
EXPECT_EQ(0, currentCsrStreamProperties.frontEndState.computeDispatchAllWalkerEnable.value);
EXPECT_EQ(0, currentCsrStreamProperties.frontEndState.disableEUFusion.value);
EXPECT_EQ(0, currentCsrStreamProperties.frontEndState.disableOverdispatch.value);
EXPECT_EQ(0, currentCsrStreamProperties.stateComputeMode.isCoherencyRequired.value);
EXPECT_EQ(0, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(expectedLargeGrfMode, currentCsrStreamProperties.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, currentCsrStreamProperties.stateComputeMode.threadArbitrationPolicy.value);
}

View File

@ -350,6 +350,7 @@ BinaryCacheTrace = false
OverrideL1CacheControlInSurfaceState = -1
OverrideL1CacheControlInSurfaceStateForScratchSpace = -1
OverridePreferredSlmAllocationSizePerDss = -1
ForceGrfNumProgrammingWithScm = -1
ForceL3PrefetchForComputeWalker = -1
ForceZPassAsyncComputeThreadLimit = -1
ForcePixelAsyncComputeThreadLimit = -1

View File

@ -22,7 +22,8 @@ void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t
int32_t isCoherencyRequired = (requiresCoherency ? 1 : 0);
this->isCoherencyRequired.set(isCoherencyRequired);
if (this->largeGrfMode.value == -1 || numGrfRequired != GrfConfig::NotApplicable) {
bool reportNumGrf = hwInfoConfig.isGrfNumReportedWithScm();
if (reportNumGrf && (this->largeGrfMode.value == -1 || numGrfRequired != GrfConfig::NotApplicable)) {
int32_t largeGrfMode = (numGrfRequired == GrfConfig::LargeGrfNumber ? 1 : 0);
this->largeGrfMode.set(largeGrfMode);
}
@ -51,7 +52,7 @@ void StateComputeModeProperties::setProperties(bool requiresCoherency, uint32_t
}
this->threadArbitrationPolicy.set(threadArbitrationPolicy);
setPropertiesExtra(hwInfoConfig.isGrfNumReportedWithScm());
setPropertiesExtra(reportNumGrf);
}
void StateComputeModeProperties::setProperties(const StateComputeModeProperties &properties) {

View File

@ -160,6 +160,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, OverrideBufferSuitableForRenderCompression, -1,
DECLARE_DEBUG_VARIABLE(int32_t, OverrideL1CacheControlInSurfaceState, -1, "-1: feature inactive, >=0 : following L1 cache control value will be programmed in render surface state (for regular buffers)")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideL1CacheControlInSurfaceStateForScratchSpace, -1, "-1: feature inactive, >=0 : following L1 cache control value will be programmed in render surface state for scratch space")
DECLARE_DEBUG_VARIABLE(int32_t, OverridePreferredSlmAllocationSizePerDss, -1, "-1: default, >=0: program value for preferred SLM allocation size per dss")
DECLARE_DEBUG_VARIABLE(int32_t, ForceGrfNumProgrammingWithScm, -1, "-1: default, 0: do not program grf num with SCM, 1: program grf num with SCM")
DECLARE_DEBUG_VARIABLE(int32_t, ForceL3PrefetchForComputeWalker, -1, "-1: default, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, ForceZPassAsyncComputeThreadLimit, -1, "-1: default, >0: Limit value in STATE_COMPUTE_MODE")
DECLARE_DEBUG_VARIABLE(int32_t, ForcePixelAsyncComputeThreadLimit, -1, "-1: default, >0: Limit value in STATE_COMPUTE_MODE")

View File

@ -6855,7 +6855,8 @@ typedef struct tagSTATE_COMPUTE_MODE {
uint32_t ForceNonCoherent : BITFIELD_RANGE(3, 4);
uint32_t FastClearDisabledOnCompressedSurface : BITFIELD_RANGE(5, 5);
uint32_t DisableSlmReadMergeOptimization : BITFIELD_RANGE(6, 6);
uint32_t Reserved_39 : BITFIELD_RANGE(7, 10);
uint32_t Reserved_39 : BITFIELD_RANGE(7, 9);
uint32_t Reserved_42 : BITFIELD_RANGE(10, 10);
uint32_t DisableAtomicOnClearData : BITFIELD_RANGE(11, 11);
uint32_t Reserved_44 : BITFIELD_RANGE(12, 12);
uint32_t EuThreadSchedulingModeOverride : BITFIELD_RANGE(13, 14);

View File

@ -351,6 +351,9 @@ bool HwInfoConfigHw<gfxProduct>::isIpSamplingSupported(const HardwareInfo &hwInf
template <PRODUCT_FAMILY gfxProduct>
bool HwInfoConfigHw<gfxProduct>::isGrfNumReportedWithScm() const {
if (DebugManager.flags.ForceGrfNumProgrammingWithScm.get() != -1) {
return DebugManager.flags.ForceGrfNumProgrammingWithScm.get();
}
return true;
}

View File

@ -39,6 +39,7 @@ macro(macro_for_each_core_type)
set(SRC_FILE ${NEO_SHARED_TEST_DIRECTORY}/common/${BRANCH}${CORE_TYPE_LOWER}/unit_test_helper_${CORE_TYPE_LOWER}.cpp)
if(EXISTS ${SRC_FILE})
list(APPEND neo_libult_common_SRCS_ENABLE_TESTED_HW ${SRC_FILE})
include_directories(${NEO_SHARED_TEST_DIRECTORY}/common/${BRANCH}${CORE_TYPE_LOWER}/definitions${BRANCH_DIR_SUFFIX})
endif()
foreach(SRC_IT "enable_family_full_core_" "implicit_scaling_")
set(SRC_FILE ${NEO_SHARED_DIRECTORY}${BRANCH}${CORE_TYPE_LOWER}${BRANCH_DIR}${SRC_IT}${CORE_TYPE_LOWER}.cpp)

View File

@ -9,13 +9,15 @@ if(TESTS_XE_HPC_CORE)
set(NEO_CORE_TESTS_XE_HPC_CORE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/test_traits_xe_hpc_core.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}unit_test_helper_xe_hpc_core_extra.inl
)
set_property(GLOBAL PROPERTY NEO_CORE_TESTS_XE_HPC_CORE ${NEO_CORE_TESTS_XE_HPC_CORE})
add_subdirectories()
target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${NEO_CORE_TESTS_XE_HPC_CORE}
${CMAKE_CURRENT_SOURCE_DIR}/test_preemption_xe_hpc_core.cpp
)

View File

@ -0,0 +1,6 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/

View File

@ -13,6 +13,8 @@
using Family = NEO::XE_HPC_COREFamily;
#include "unit_test_helper_xe_hpc_core_extra.inl"
namespace NEO {
template <>

View File

@ -61,6 +61,7 @@ TEST(StreamPropertiesTests, whenSettingCooperativeKernelPropertiesThenCorrectVal
TEST(StreamPropertiesTests, whenSettingStateComputeModePropertiesThenCorrectValuesAreSet) {
DebugManagerStateRestore restorer;
DebugManager.flags.ForceGrfNumProgrammingWithScm.set(1);
int32_t threadArbitrationPolicyValues[] = {
ThreadArbitrationPolicy::AgeBased, ThreadArbitrationPolicy::RoundRobin,

View File

@ -81,6 +81,7 @@ HWTEST2_F(CommandEncodeStatesTestPvcAndLater, givenVariousValuesWhenCallingSetBa
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandContainerWhenNumGrfRequiredIsGreaterThanDefaultThenLargeGrfModeEnabled) {
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
StreamProperties streamProperties{};
streamProperties.stateComputeMode.setProperties(false, GrfConfig::LargeGrfNumber, 0u, *defaultHwInfo);
EncodeComputeMode<FamilyType>::programComputeModeCommand(*cmdContainer->getCommandStream(), streamProperties.stateComputeMode, *defaultHwInfo);
@ -91,7 +92,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTestPvcAndLater, givenCommandCon
ASSERT_NE(itorCmd, commands.end());
auto cmd = genCmdCast<STATE_COMPUTE_MODE *>(*itorCmd);
EXPECT_TRUE(cmd->getLargeGrfMode());
EXPECT_EQ(hwInfoConfig.isGrfNumReportedWithScm(), cmd->getLargeGrfMode());
}
using CommandEncodeStatesTestHpc = Test<CommandEncodeStatesFixture>;

View File

@ -438,6 +438,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeD
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenDispatchInterfaceWhenNumRequiredGrfIsNotDefaultThenStateComputeModeCommandAdded) {
DebugManagerStateRestore restorer;
DebugManager.flags.ForceGrfNumProgrammingWithScm.set(1);
StreamProperties streamProperties{};
streamProperties.stateComputeMode.setProperties(false, 128, 0u, *defaultHwInfo);
streamProperties.stateComputeMode.setProperties(false, 128, 0u, *defaultHwInfo);

View File

@ -7,6 +7,7 @@
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/test/common/fixtures/device_fixture.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/test_macros/test.h"
using namespace NEO;
@ -52,3 +53,19 @@ HWTEST_F(HwInfoConfigTest, givenHwInfoConfigWhenGetProductConfigThenCorrectMatch
const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
EXPECT_NE(hwInfoConfig.getProductConfigFromHwInfo(*defaultHwInfo), UNKNOWN_ISA);
}
HWTEST_F(HwInfoConfigTest, whenIsGrfNumReportedWithScmIsQueriedThenTrueIsReturned) {
const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
EXPECT_TRUE(hwInfoConfig.isGrfNumReportedWithScm());
}
HWTEST_F(HwInfoConfigTest, givenForceGrfNumProgrammingWithScmFlagSetWhenIsGrfNumReportedWithScmIsQueriedThenCorrectValueIsReturned) {
DebugManagerStateRestore restorer;
const auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily);
DebugManager.flags.ForceGrfNumProgrammingWithScm.set(0);
EXPECT_FALSE(hwInfoConfig.isGrfNumReportedWithScm());
DebugManager.flags.ForceGrfNumProgrammingWithScm.set(1);
EXPECT_TRUE(hwInfoConfig.isGrfNumReportedWithScm());
}