Fix fusedEuDispatch programming and minimum wg size

Related-To: NEO-6455

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2021-11-23 12:22:28 +00:00
committed by Compute-Runtime-Automation
parent 5a2792a74f
commit de7195d174
7 changed files with 115 additions and 17 deletions

View File

@ -719,7 +719,27 @@ TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenItHasCorre
using LocalWorkSizeTest = ::testing::Test;
HWTEST_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtr) {
HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenWorkgroupSizeIsCorrect, IsAtMostGen11) {
MockClDevice device{new MockDevice};
MockKernelWithInternals kernel(device);
kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1;
DispatchInfo dispatchInfo;
dispatchInfo.setClDevice(&device);
dispatchInfo.setKernel(kernel.mockKernel);
const uint32_t maxBarriersPerHSlice = (defaultHwInfo->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16;
const uint32_t nonFusedMinWorkGroupSize = static_cast<uint32_t>(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) *
device.getSharedDeviceInfo().numThreadsPerEU *
static_cast<uint32_t>(kernel.mockKernel->getKernelInfo().getMaxSimdSize()) /
maxBarriersPerHSlice;
WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
}
using IsCoreWithFusedEu = IsWithinGfxCore<IGFX_GEN12LP_CORE, IGFX_XE_HP_CORE>;
HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtr, IsCoreWithFusedEu) {
MockClDevice device{new MockDevice};
MockKernelWithInternals kernel(device);
kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1;
@ -735,11 +755,8 @@ HWTEST_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEu
const uint32_t fusedMinWorkGroupSize = 2 * nonFusedMinWorkGroupSize;
WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
if (defaultHwInfo->platform.eRenderCoreFamily < IGFX_GEN12_CORE) {
EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
} else {
EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
}
EXPECT_NE(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
}
HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtrForcedByDebugManager, IsAtLeastGen12lp) {

View File

@ -208,7 +208,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PreambleCfeStateXeHPAndLater, givenSetDebugFlagWhen
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
EXPECT_EQ(expectedValue1, cfeState->getFusedEuDispatch());
EXPECT_EQ(expectedValue1, static_cast<uint32_t>(cfeState->getOverDispatchControl()));
EXPECT_EQ(expectedValue1, cfeState->getLargeGRFThreadAdjustDisable());
EXPECT_EQ(expectedValue2, cfeState->getNumberOfWalkers());

View File

@ -81,3 +81,39 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEComputeOverdispatchDisableSetTrue
EXPECT_TRUE(cfeState->getComputeOverdispatchDisable());
}
HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToTrue, IsXEHP) {
using CFE_STATE = typename FamilyType::CFE_STATE;
auto hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.fusedEuEnabled = false;
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), cfeStateIt);
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
EXPECT_TRUE(cfeState->getFusedEuDispatch());
}
HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXEHP) {
using CFE_STATE = typename FamilyType::CFE_STATE;
auto hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.fusedEuEnabled = true;
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), cfeStateIt);
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
EXPECT_FALSE(cfeState->getFusedEuDispatch());
}

View File

@ -9,6 +9,7 @@
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/state_base_address.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/unit_test/preamble/preamble_fixture.h"
#include "opencl/source/helpers/cl_memory_properties_helpers.h"
#include "opencl/source/mem_obj/buffer.h"
@ -356,3 +357,41 @@ XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenDecompressInL3ForImage2dFr
clReleaseMemObject(imageDesc.mem_object);
}
using PreambleCfeState = PreambleFixture;
HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToTrue, IsXeHpgCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;
auto hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.fusedEuEnabled = false;
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), cfeStateIt);
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
EXPECT_TRUE(cfeState->getFusedEuDispatch());
}
HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;
auto hwInfo = *defaultHwInfo;
hwInfo.capabilityTable.fusedEuEnabled = true;
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
StreamProperties streamProperties{};
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
parseCommands<FamilyType>(linearStream);
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), cfeStateIt);
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
EXPECT_FALSE(cfeState->getFusedEuDispatch());
}