Fix CFEFusedEUDispatch debug flag

Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:
Konstanty Misiak
2022-03-17 15:53:21 +00:00
committed by Compute-Runtime-Automation
parent 2fb59fd71a
commit 174c27eb31
20 changed files with 120 additions and 22 deletions

View File

@ -738,7 +738,7 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties)
preferredGroupSizeProperties->preferredMultiple = this->kernelImmData->getKernelInfo()->getMaxSimdSize();
auto &hwHelper = NEO::HwHelper::get(this->module->getDevice()->getHwInfo().platform.eRenderCoreFamily);
if (hwHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo())) {
if (hwHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo(), kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) {
preferredGroupSizeProperties->preferredMultiple *= 2;
}
}

View File

@ -1093,7 +1093,7 @@ TEST_F(KernelPropertiesTests, whenPassingPreferredGroupSizeStructToGetProperties
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
auto &hwHelper = NEO::HwHelper::get(module->getDevice()->getHwInfo().platform.eRenderCoreFamily);
if (hwHelper.isFusedEuDispatchEnabled(module->getDevice()->getHwInfo())) {
if (hwHelper.isFusedEuDispatchEnabled(module->getDevice()->getHwInfo(), false)) {
EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast<uint32_t>(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()) * 2);
} else {
EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast<uint32_t>(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()));

View File

@ -396,7 +396,7 @@ void ClDevice::initializeCaps() {
deviceInfo.preferredLocalAtomicAlignment = MemoryConstants::cacheLineSize;
deviceInfo.preferredPlatformAtomicAlignment = MemoryConstants::cacheLineSize;
deviceInfo.preferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(hwInfo)
deviceInfo.preferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(hwInfo, false)
? CommonConstants::maximalSimdSize * 2
: CommonConstants::maximalSimdSize;

View File

@ -565,7 +565,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
preferredWorkGroupSizeMultiple = kernelInfo.getMaxSimdSize();
if (hwHelper.isFusedEuDispatchEnabled(hwInfo) && !kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion) {
if (hwHelper.isFusedEuDispatchEnabled(hwInfo, kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) {
preferredWorkGroupSizeMultiple *= 2;
}
srcSize = sizeof(preferredWorkGroupSizeMultiple);

View File

@ -32,8 +32,12 @@ TEST(localWorkSizeTest, givenDisableEUFusionWhenCreatingWorkSizeInfoThenCorrectM
true // disableEUFusion
);
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true);
auto WGSMultiple = fusedDispatchEnabled ? 2 : 1;
uint32_t maxBarriersPerHSlice = (defaultHwInfo.get()->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16;
uint32_t expectedMinWGS = simdSize * numThreadsPerSubS / maxBarriersPerHSlice;
uint32_t expectedMinWGS = WGSMultiple * simdSize * numThreadsPerSubS / maxBarriersPerHSlice;
EXPECT_EQ(expectedMinWGS, wsInfo.minWorkGroupSize);
}

View File

@ -242,7 +242,7 @@ TEST_F(DeviceGetCapsTest, WhenCreatingDeviceThenCapsArePopulatedCorrectly) {
EXPECT_EQ(64u, caps.preferredPlatformAtomicAlignment);
EXPECT_TRUE(caps.nonUniformWorkGroupSupport);
auto expectedPreferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo)
auto expectedPreferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, false)
? CommonConstants::maximalSimdSize * 2
: CommonConstants::maximalSimdSize;
EXPECT_EQ(expectedPreferredWorkGroupSizeMultiple, caps.preferredWorkGroupSizeMultiple);

View File

@ -195,7 +195,7 @@ GEN12LPTEST_F(HwHelperTestGen12Lp, givenTgllpWhenIsFusedEuDispatchEnabledIsCalle
for (auto &[expectedResult, wa, debugKey] : testParams) {
waTable.flags.waDisableFusedThreadScheduling = wa;
DebugManager.flags.CFEFusedEUDispatch.set(debugKey);
EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo));
EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo, false));
}
}

View File

@ -1061,7 +1061,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, WhenIsFusedEuDispatchEnabledIsCalledTh
GTEST_SKIP();
}
auto &helper = HwHelper::get(renderCoreFamily);
EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo));
EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo, false));
}
HWTEST_F(PipeControlHelperTests, WhenGettingPipeControSizeForCacheFlushThenReturnCorrectValue) {

View File

@ -315,6 +315,10 @@ TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGro
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true;
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true);
auto expectedValue = kernelInfo.getMaxSimdSize() * (fusedDispatchEnabled ? 2 : 1);
cl_kernel_info paramName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE;
size_t paramValue;
size_t paramValueSize = sizeof(paramValue);
@ -328,9 +332,35 @@ TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGro
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(paramValueSize, paramValueSizeRet);
EXPECT_EQ(kernelInfo.getMaxSimdSize(), paramValue);
EXPECT_EQ(expectedValue, paramValue);
}
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = false;
TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) {
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.CFEFusedEUDispatch.set(0);
KernelInfo kernelInfo = {};
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true;
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true);
auto expectedValue = kernelInfo.getMaxSimdSize() * (fusedDispatchEnabled ? 2 : 1);
cl_kernel_info paramName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE;
size_t paramValue;
size_t paramValueSize = sizeof(paramValue);
size_t paramValueSizeRet = 0;
retVal = kernel.getWorkGroupInfo(
paramName,
paramValueSize,
&paramValue,
&paramValueSizeRet);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(paramValueSize, paramValueSizeRet);
EXPECT_EQ(expectedValue, paramValue);
}
TEST_F(KernelTests, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValueErrorIsReturned) {

View File

@ -374,6 +374,27 @@ HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThe
EXPECT_TRUE(cfeState->getFusedEuDispatch());
}
HWTEST2_F(PreambleCfeState, givenXehpEnabledFusedEuAndDisableFusedDispatchFromKernelWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.CFEFusedEUDispatch.set(0);
auto hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.fusedEuEnabled = true;
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, true, false, false, hwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), cfeStateIt);
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
EXPECT_FALSE(cfeState->getFusedEuDispatch());
}
HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;
@ -390,4 +411,4 @@ HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThen
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
EXPECT_FALSE(cfeState->getFusedEuDispatch());
}
}

View File

@ -20,6 +20,20 @@ using Family = NEO::TGLLPFamily;
namespace NEO {
template <>
inline bool HwHelperHw<Family>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const {
auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.flags.waDisableFusedThreadScheduling;
fusedEuDispatchEnabled &= hwInfo.capabilityTable.fusedEuEnabled;
if (disableEUFusionForKernel)
fusedEuDispatchEnabled = false;
if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) {
fusedEuDispatchEnabled = (DebugManager.flags.CFEFusedEUDispatch.get() == 0);
}
return fusedEuDispatchEnabled;
}
template <>
size_t HwHelperHw<Family>::getMax3dImageWidthOrHeight() const {
return 2048;

View File

@ -83,7 +83,7 @@ uint32_t PreambleHelper<TGLLPFamily>::getUrbEntryAllocationSize() {
template <>
void PreambleHelper<TGLLPFamily>::programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo, bool disableEUFusion) {
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo) || disableEUFusion) {
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo, disableEUFusion)) {
mediaVfeState->setDisableSlice0Subslice2(true);
}
if (DebugManager.flags.MediaVfeStateMaxSubSlices.get() != -1) {

View File

@ -98,7 +98,7 @@ class HwHelper {
virtual uint32_t getMinimalSIMDSize() = 0;
virtual bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const = 0;
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const = 0;
virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const = 0;
virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const = 0;
virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0;
virtual void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const = 0;
@ -301,7 +301,7 @@ class HwHelperHw : public HwHelper {
bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const override;
bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const override;
static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -11,7 +11,7 @@
namespace NEO {
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const {
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const {
return false;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -11,7 +11,7 @@
namespace NEO {
template <typename GfxFamily>
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const {
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const {
auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.flags.waDisableFusedThreadScheduling;
fusedEuDispatchEnabled &= hwInfo.capabilityTable.fusedEuEnabled;

View File

@ -53,6 +53,7 @@ void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) {
}
}
}
void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableEUFusion) {
minWorkGroupSize = 0;
if (hasBarriers) {
@ -65,10 +66,11 @@ void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableE
}
const auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
if (hwHelper.isFusedEuDispatchEnabled(*hwInfo) && !disableEUFusion) {
if (hwHelper.isFusedEuDispatchEnabled(*hwInfo, disableEUFusion)) {
minWorkGroupSize *= 2;
}
}
void WorkSizeInfo::checkRatio(const size_t workItems[3]) {
if (slmTotalSize > 0) {
useRatio = true;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -36,7 +36,7 @@ void PreambleHelper<Family>::appendProgramVFEState(const HardwareInfo &hwInfo, c
}
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo)) {
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo, false)) {
command->setFusedEuDispatch(true);
}

View File

@ -22,6 +22,20 @@ namespace NEO {
template <>
const AuxTranslationMode HwHelperHw<Family>::defaultAuxTranslationMode = AuxTranslationMode::Blit;
template <>
inline bool HwHelperHw<Family>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const {
auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.flags.waDisableFusedThreadScheduling;
fusedEuDispatchEnabled &= hwInfo.capabilityTable.fusedEuEnabled;
if (disableEUFusionForKernel)
fusedEuDispatchEnabled = false;
if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) {
fusedEuDispatchEnabled = (DebugManager.flags.CFEFusedEUDispatch.get() == 0);
}
return fusedEuDispatchEnabled;
}
template <>
uint32_t HwHelperHw<Family>::getMetricsLibraryGenId() const {
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::XeHPG);

View File

@ -29,7 +29,7 @@ void PreambleHelper<Family>::appendProgramVFEState(const HardwareInfo &hwInfo, c
}
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo) || streamProperties.frontEndState.disableEUFusion.value == 1) {
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo, streamProperties.frontEndState.disableEUFusion.value == 1)) {
command->setFusedEuDispatch(true);
}

View File

@ -178,6 +178,19 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenDisableEUFusionWhenProgramAdditionalFiel
EXPECT_TRUE(pMediaVfeState->getDisableSlice0Subslice2());
}
HWTEST2_F(Gen12LpPreambleVfeState, givenDisableEUFusionAndCFEFusedEUDispatchWhenProgramAdditionalFieldsInVfeStateThenCorrectFieldIsSet, IsTGLLP) {
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
DebugManagerStateRestore dbgRestorer;
DebugManager.flags.CFEFusedEUDispatch.set(0);
auto pHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
auto pMediaVfeState = reinterpret_cast<MEDIA_VFE_STATE *>(linearStream.getSpace(sizeof(MEDIA_VFE_STATE)));
*pMediaVfeState = FamilyType::cmdInitMediaVfeState;
PreambleHelper<FamilyType>::programAdditionalFieldsInVfeState(pMediaVfeState, *pHwInfo, true);
EXPECT_FALSE(pMediaVfeState->getDisableSlice0Subslice2());
}
typedef PreambleFixture ThreadArbitrationGen12Lp;
GEN12LPTEST_F(ThreadArbitrationGen12Lp, whenGetDefaultThreadArbitrationPolicyIsCalledThenCorrectPolicyIsReturned) {
EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, HwHelperHw<FamilyType>::get().getDefaultThreadArbitrationPolicy());