mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Fix CFEFusedEUDispatch debug flag
Signed-off-by: Konstanty Misiak <konstanty.misiak@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
2fb59fd71a
commit
174c27eb31
@ -738,7 +738,7 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties)
|
||||
|
||||
preferredGroupSizeProperties->preferredMultiple = this->kernelImmData->getKernelInfo()->getMaxSimdSize();
|
||||
auto &hwHelper = NEO::HwHelper::get(this->module->getDevice()->getHwInfo().platform.eRenderCoreFamily);
|
||||
if (hwHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo())) {
|
||||
if (hwHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo(), kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) {
|
||||
preferredGroupSizeProperties->preferredMultiple *= 2;
|
||||
}
|
||||
}
|
||||
|
@ -1093,7 +1093,7 @@ TEST_F(KernelPropertiesTests, whenPassingPreferredGroupSizeStructToGetProperties
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
auto &hwHelper = NEO::HwHelper::get(module->getDevice()->getHwInfo().platform.eRenderCoreFamily);
|
||||
if (hwHelper.isFusedEuDispatchEnabled(module->getDevice()->getHwInfo())) {
|
||||
if (hwHelper.isFusedEuDispatchEnabled(module->getDevice()->getHwInfo(), false)) {
|
||||
EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast<uint32_t>(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()) * 2);
|
||||
} else {
|
||||
EXPECT_EQ(preferredGroupProperties.preferredMultiple, static_cast<uint32_t>(kernel->getImmutableData()->getKernelInfo()->getMaxSimdSize()));
|
||||
|
@ -396,7 +396,7 @@ void ClDevice::initializeCaps() {
|
||||
deviceInfo.preferredLocalAtomicAlignment = MemoryConstants::cacheLineSize;
|
||||
deviceInfo.preferredPlatformAtomicAlignment = MemoryConstants::cacheLineSize;
|
||||
|
||||
deviceInfo.preferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(hwInfo)
|
||||
deviceInfo.preferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(hwInfo, false)
|
||||
? CommonConstants::maximalSimdSize * 2
|
||||
: CommonConstants::maximalSimdSize;
|
||||
|
||||
|
@ -565,7 +565,7 @@ cl_int Kernel::getWorkGroupInfo(cl_kernel_work_group_info paramName,
|
||||
|
||||
case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
|
||||
preferredWorkGroupSizeMultiple = kernelInfo.getMaxSimdSize();
|
||||
if (hwHelper.isFusedEuDispatchEnabled(hwInfo) && !kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion) {
|
||||
if (hwHelper.isFusedEuDispatchEnabled(hwInfo, kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) {
|
||||
preferredWorkGroupSizeMultiple *= 2;
|
||||
}
|
||||
srcSize = sizeof(preferredWorkGroupSizeMultiple);
|
||||
|
@ -32,8 +32,12 @@ TEST(localWorkSizeTest, givenDisableEUFusionWhenCreatingWorkSizeInfoThenCorrectM
|
||||
true // disableEUFusion
|
||||
);
|
||||
|
||||
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true);
|
||||
auto WGSMultiple = fusedDispatchEnabled ? 2 : 1;
|
||||
|
||||
uint32_t maxBarriersPerHSlice = (defaultHwInfo.get()->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16;
|
||||
uint32_t expectedMinWGS = simdSize * numThreadsPerSubS / maxBarriersPerHSlice;
|
||||
uint32_t expectedMinWGS = WGSMultiple * simdSize * numThreadsPerSubS / maxBarriersPerHSlice;
|
||||
EXPECT_EQ(expectedMinWGS, wsInfo.minWorkGroupSize);
|
||||
}
|
||||
|
||||
|
@ -242,7 +242,7 @@ TEST_F(DeviceGetCapsTest, WhenCreatingDeviceThenCapsArePopulatedCorrectly) {
|
||||
EXPECT_EQ(64u, caps.preferredPlatformAtomicAlignment);
|
||||
EXPECT_TRUE(caps.nonUniformWorkGroupSupport);
|
||||
|
||||
auto expectedPreferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo)
|
||||
auto expectedPreferredWorkGroupSizeMultiple = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, false)
|
||||
? CommonConstants::maximalSimdSize * 2
|
||||
: CommonConstants::maximalSimdSize;
|
||||
EXPECT_EQ(expectedPreferredWorkGroupSizeMultiple, caps.preferredWorkGroupSizeMultiple);
|
||||
|
@ -195,7 +195,7 @@ GEN12LPTEST_F(HwHelperTestGen12Lp, givenTgllpWhenIsFusedEuDispatchEnabledIsCalle
|
||||
for (auto &[expectedResult, wa, debugKey] : testParams) {
|
||||
waTable.flags.waDisableFusedThreadScheduling = wa;
|
||||
DebugManager.flags.CFEFusedEUDispatch.set(debugKey);
|
||||
EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo));
|
||||
EXPECT_EQ(expectedResult, helper.isFusedEuDispatchEnabled(hardwareInfo, false));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1061,7 +1061,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, WhenIsFusedEuDispatchEnabledIsCalledTh
|
||||
GTEST_SKIP();
|
||||
}
|
||||
auto &helper = HwHelper::get(renderCoreFamily);
|
||||
EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo));
|
||||
EXPECT_FALSE(helper.isFusedEuDispatchEnabled(hardwareInfo, false));
|
||||
}
|
||||
|
||||
HWTEST_F(PipeControlHelperTests, WhenGettingPipeControSizeForCacheFlushThenReturnCorrectValue) {
|
||||
|
@ -315,6 +315,10 @@ TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGro
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true;
|
||||
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
|
||||
|
||||
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true);
|
||||
auto expectedValue = kernelInfo.getMaxSimdSize() * (fusedDispatchEnabled ? 2 : 1);
|
||||
|
||||
cl_kernel_info paramName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE;
|
||||
size_t paramValue;
|
||||
size_t paramValueSize = sizeof(paramValue);
|
||||
@ -328,9 +332,35 @@ TEST_F(KernelTests, GivenRequiredDisabledEUFusionFlagWhenGettingPrefferedWorkGro
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(paramValueSize, paramValueSizeRet);
|
||||
|
||||
EXPECT_EQ(kernelInfo.getMaxSimdSize(), paramValue);
|
||||
EXPECT_EQ(expectedValue, paramValue);
|
||||
}
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = false;
|
||||
TEST_F(KernelTests, GivenCFEFusedEUDispatchEnabledAndRequiredDisabledUEFusionWhenGettingPrefferedWorkGroupSizeMultipleThenCorectValueIsReturned) {
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.CFEFusedEUDispatch.set(0);
|
||||
|
||||
KernelInfo kernelInfo = {};
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion = true;
|
||||
MockKernel kernel(pProgram, kernelInfo, *pClDevice);
|
||||
|
||||
auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily);
|
||||
bool fusedDispatchEnabled = hwHelper.isFusedEuDispatchEnabled(*defaultHwInfo, true);
|
||||
auto expectedValue = kernelInfo.getMaxSimdSize() * (fusedDispatchEnabled ? 2 : 1);
|
||||
|
||||
cl_kernel_info paramName = CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE;
|
||||
size_t paramValue;
|
||||
size_t paramValueSize = sizeof(paramValue);
|
||||
size_t paramValueSizeRet = 0;
|
||||
|
||||
retVal = kernel.getWorkGroupInfo(
|
||||
paramName,
|
||||
paramValueSize,
|
||||
¶mValue,
|
||||
¶mValueSizeRet);
|
||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||
EXPECT_EQ(paramValueSize, paramValueSizeRet);
|
||||
|
||||
EXPECT_EQ(expectedValue, paramValue);
|
||||
}
|
||||
|
||||
TEST_F(KernelTests, GivenInvalidParamNameWhenGettingWorkGroupInfoThenInvalidValueErrorIsReturned) {
|
||||
|
@ -374,6 +374,27 @@ HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThe
|
||||
EXPECT_TRUE(cfeState->getFusedEuDispatch());
|
||||
}
|
||||
|
||||
HWTEST2_F(PreambleCfeState, givenXehpEnabledFusedEuAndDisableFusedDispatchFromKernelWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.CFEFusedEUDispatch.set(0);
|
||||
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.fusedEuEnabled = true;
|
||||
|
||||
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.frontEndState.setProperties(false, true, false, false, hwInfo);
|
||||
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
|
||||
parseCommands<FamilyType>(linearStream);
|
||||
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), cfeStateIt);
|
||||
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
|
||||
|
||||
EXPECT_FALSE(cfeState->getFusedEuDispatch());
|
||||
}
|
||||
|
||||
HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
@ -390,4 +411,4 @@ HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThen
|
||||
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
|
||||
|
||||
EXPECT_FALSE(cfeState->getFusedEuDispatch());
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,20 @@ using Family = NEO::TGLLPFamily;
|
||||
|
||||
namespace NEO {
|
||||
|
||||
template <>
|
||||
inline bool HwHelperHw<Family>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const {
|
||||
auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.flags.waDisableFusedThreadScheduling;
|
||||
fusedEuDispatchEnabled &= hwInfo.capabilityTable.fusedEuEnabled;
|
||||
|
||||
if (disableEUFusionForKernel)
|
||||
fusedEuDispatchEnabled = false;
|
||||
|
||||
if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) {
|
||||
fusedEuDispatchEnabled = (DebugManager.flags.CFEFusedEUDispatch.get() == 0);
|
||||
}
|
||||
return fusedEuDispatchEnabled;
|
||||
}
|
||||
|
||||
template <>
|
||||
size_t HwHelperHw<Family>::getMax3dImageWidthOrHeight() const {
|
||||
return 2048;
|
||||
|
@ -83,7 +83,7 @@ uint32_t PreambleHelper<TGLLPFamily>::getUrbEntryAllocationSize() {
|
||||
template <>
|
||||
void PreambleHelper<TGLLPFamily>::programAdditionalFieldsInVfeState(VFE_STATE_TYPE *mediaVfeState, const HardwareInfo &hwInfo, bool disableEUFusion) {
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo) || disableEUFusion) {
|
||||
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo, disableEUFusion)) {
|
||||
mediaVfeState->setDisableSlice0Subslice2(true);
|
||||
}
|
||||
if (DebugManager.flags.MediaVfeStateMaxSubSlices.get() != -1) {
|
||||
|
@ -98,7 +98,7 @@ class HwHelper {
|
||||
virtual uint32_t getMinimalSIMDSize() = 0;
|
||||
virtual bool isWorkaroundRequired(uint32_t lowestSteppingWithBug, uint32_t steppingWithFix, const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const = 0;
|
||||
virtual bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const = 0;
|
||||
virtual uint64_t getGpuTimeStampInNS(uint64_t timeStamp, double frequency) const = 0;
|
||||
virtual uint32_t getBindlessSurfaceExtendedMessageDescriptorValue(uint32_t surfStateOffset) const = 0;
|
||||
virtual void setExtraAllocationData(AllocationData &allocationData, const AllocationProperties &properties, const HardwareInfo &hwInfo) const = 0;
|
||||
@ -301,7 +301,7 @@ class HwHelperHw : public HwHelper {
|
||||
|
||||
bool isOffsetToSkipSetFFIDGPWARequired(const HardwareInfo &hwInfo) const override;
|
||||
|
||||
bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const override;
|
||||
bool isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const override;
|
||||
|
||||
static bool isForceDefaultRCSEngineWARequired(const HardwareInfo &hwInfo);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -11,7 +11,7 @@
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const {
|
||||
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2021 Intel Corporation
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -11,7 +11,7 @@
|
||||
namespace NEO {
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo) const {
|
||||
inline bool HwHelperHw<GfxFamily>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const {
|
||||
auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.flags.waDisableFusedThreadScheduling;
|
||||
fusedEuDispatchEnabled &= hwInfo.capabilityTable.fusedEuEnabled;
|
||||
|
||||
|
@ -53,6 +53,7 @@ void WorkSizeInfo::setIfUseImg(const KernelInfo &kernelInfo) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableEUFusion) {
|
||||
minWorkGroupSize = 0;
|
||||
if (hasBarriers) {
|
||||
@ -65,10 +66,11 @@ void WorkSizeInfo::setMinWorkGroupSize(const HardwareInfo *hwInfo, bool disableE
|
||||
}
|
||||
|
||||
const auto &hwHelper = HwHelper::get(hwInfo->platform.eRenderCoreFamily);
|
||||
if (hwHelper.isFusedEuDispatchEnabled(*hwInfo) && !disableEUFusion) {
|
||||
if (hwHelper.isFusedEuDispatchEnabled(*hwInfo, disableEUFusion)) {
|
||||
minWorkGroupSize *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
void WorkSizeInfo::checkRatio(const size_t workItems[3]) {
|
||||
if (slmTotalSize > 0) {
|
||||
useRatio = true;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@ -36,7 +36,7 @@ void PreambleHelper<Family>::appendProgramVFEState(const HardwareInfo &hwInfo, c
|
||||
}
|
||||
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo)) {
|
||||
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo, false)) {
|
||||
command->setFusedEuDispatch(true);
|
||||
}
|
||||
|
||||
|
@ -22,6 +22,20 @@ namespace NEO {
|
||||
template <>
|
||||
const AuxTranslationMode HwHelperHw<Family>::defaultAuxTranslationMode = AuxTranslationMode::Blit;
|
||||
|
||||
template <>
|
||||
inline bool HwHelperHw<Family>::isFusedEuDispatchEnabled(const HardwareInfo &hwInfo, bool disableEUFusionForKernel) const {
|
||||
auto fusedEuDispatchEnabled = !hwInfo.workaroundTable.flags.waDisableFusedThreadScheduling;
|
||||
fusedEuDispatchEnabled &= hwInfo.capabilityTable.fusedEuEnabled;
|
||||
|
||||
if (disableEUFusionForKernel)
|
||||
fusedEuDispatchEnabled = false;
|
||||
|
||||
if (DebugManager.flags.CFEFusedEUDispatch.get() != -1) {
|
||||
fusedEuDispatchEnabled = (DebugManager.flags.CFEFusedEUDispatch.get() == 0);
|
||||
}
|
||||
return fusedEuDispatchEnabled;
|
||||
}
|
||||
|
||||
template <>
|
||||
uint32_t HwHelperHw<Family>::getMetricsLibraryGenId() const {
|
||||
return static_cast<uint32_t>(MetricsLibraryApi::ClientGen::XeHPG);
|
||||
|
@ -29,7 +29,7 @@ void PreambleHelper<Family>::appendProgramVFEState(const HardwareInfo &hwInfo, c
|
||||
}
|
||||
|
||||
auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo) || streamProperties.frontEndState.disableEUFusion.value == 1) {
|
||||
if (!hwHelper.isFusedEuDispatchEnabled(hwInfo, streamProperties.frontEndState.disableEUFusion.value == 1)) {
|
||||
command->setFusedEuDispatch(true);
|
||||
}
|
||||
|
||||
|
@ -178,6 +178,19 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenDisableEUFusionWhenProgramAdditionalFiel
|
||||
EXPECT_TRUE(pMediaVfeState->getDisableSlice0Subslice2());
|
||||
}
|
||||
|
||||
HWTEST2_F(Gen12LpPreambleVfeState, givenDisableEUFusionAndCFEFusedEUDispatchWhenProgramAdditionalFieldsInVfeStateThenCorrectFieldIsSet, IsTGLLP) {
|
||||
using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE;
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
DebugManager.flags.CFEFusedEUDispatch.set(0);
|
||||
|
||||
auto pHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo();
|
||||
auto pMediaVfeState = reinterpret_cast<MEDIA_VFE_STATE *>(linearStream.getSpace(sizeof(MEDIA_VFE_STATE)));
|
||||
*pMediaVfeState = FamilyType::cmdInitMediaVfeState;
|
||||
PreambleHelper<FamilyType>::programAdditionalFieldsInVfeState(pMediaVfeState, *pHwInfo, true);
|
||||
EXPECT_FALSE(pMediaVfeState->getDisableSlice0Subslice2());
|
||||
}
|
||||
|
||||
typedef PreambleFixture ThreadArbitrationGen12Lp;
|
||||
GEN12LPTEST_F(ThreadArbitrationGen12Lp, whenGetDefaultThreadArbitrationPolicyIsCalledThenCorrectPolicyIsReturned) {
|
||||
EXPECT_EQ(ThreadArbitrationPolicy::AgeBased, HwHelperHw<FamilyType>::get().getDefaultThreadArbitrationPolicy());
|
||||
|
Reference in New Issue
Block a user