mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Fix fusedEuDispatch programming and minimum wg size
Related-To: NEO-6455 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
5a2792a74f
commit
de7195d174
@ -719,7 +719,27 @@ TEST(localWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenItHasCorre
|
||||
|
||||
using LocalWorkSizeTest = ::testing::Test;
|
||||
|
||||
HWTEST_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtr) {
|
||||
HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenWorkgroupSizeIsCorrect, IsAtMostGen11) {
|
||||
MockClDevice device{new MockDevice};
|
||||
MockKernelWithInternals kernel(device);
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1;
|
||||
DispatchInfo dispatchInfo;
|
||||
dispatchInfo.setClDevice(&device);
|
||||
dispatchInfo.setKernel(kernel.mockKernel);
|
||||
|
||||
const uint32_t maxBarriersPerHSlice = (defaultHwInfo->platform.eRenderCoreFamily >= IGFX_GEN9_CORE) ? 32 : 16;
|
||||
const uint32_t nonFusedMinWorkGroupSize = static_cast<uint32_t>(device.getSharedDeviceInfo().maxNumEUsPerSubSlice) *
|
||||
device.getSharedDeviceInfo().numThreadsPerEU *
|
||||
static_cast<uint32_t>(kernel.mockKernel->getKernelInfo().getMaxSimdSize()) /
|
||||
maxBarriersPerHSlice;
|
||||
WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
|
||||
|
||||
EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
|
||||
}
|
||||
|
||||
using IsCoreWithFusedEu = IsWithinGfxCore<IGFX_GEN12LP_CORE, IGFX_XE_HP_CORE>;
|
||||
|
||||
HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtr, IsCoreWithFusedEu) {
|
||||
MockClDevice device{new MockDevice};
|
||||
MockKernelWithInternals kernel(device);
|
||||
kernel.kernelInfo.kernelDescriptor.kernelAttributes.barrierCount = 1;
|
||||
@ -735,11 +755,8 @@ HWTEST_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEu
|
||||
const uint32_t fusedMinWorkGroupSize = 2 * nonFusedMinWorkGroupSize;
|
||||
WorkSizeInfo workSizeInfo = createWorkSizeInfoFromDispatchInfo(dispatchInfo);
|
||||
|
||||
if (defaultHwInfo->platform.eRenderCoreFamily < IGFX_GEN12_CORE) {
|
||||
EXPECT_EQ(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
|
||||
} else {
|
||||
EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
|
||||
}
|
||||
EXPECT_NE(nonFusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
|
||||
EXPECT_EQ(fusedMinWorkGroupSize, workSizeInfo.minWorkGroupSize);
|
||||
}
|
||||
|
||||
HWTEST2_F(LocalWorkSizeTest, givenDispatchInfoWhenWorkSizeInfoIsCreatedThenTestEuFusionFtrForcedByDebugManager, IsAtLeastGen12lp) {
|
||||
|
@ -208,7 +208,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PreambleCfeStateXeHPAndLater, givenSetDebugFlagWhen
|
||||
|
||||
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
|
||||
|
||||
EXPECT_EQ(expectedValue1, cfeState->getFusedEuDispatch());
|
||||
EXPECT_EQ(expectedValue1, static_cast<uint32_t>(cfeState->getOverDispatchControl()));
|
||||
EXPECT_EQ(expectedValue1, cfeState->getLargeGRFThreadAdjustDisable());
|
||||
EXPECT_EQ(expectedValue2, cfeState->getNumberOfWalkers());
|
||||
|
@ -81,3 +81,39 @@ HWTEST2_F(PreambleCfeState, givenXehpAndFlagCFEComputeOverdispatchDisableSetTrue
|
||||
|
||||
EXPECT_TRUE(cfeState->getComputeOverdispatchDisable());
|
||||
}
|
||||
|
||||
HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToTrue, IsXEHP) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.fusedEuEnabled = false;
|
||||
|
||||
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
|
||||
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
|
||||
parseCommands<FamilyType>(linearStream);
|
||||
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), cfeStateIt);
|
||||
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
|
||||
|
||||
EXPECT_TRUE(cfeState->getFusedEuDispatch());
|
||||
}
|
||||
|
||||
HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXEHP) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.fusedEuEnabled = true;
|
||||
|
||||
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
|
||||
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
|
||||
parseCommands<FamilyType>(linearStream);
|
||||
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), cfeStateIt);
|
||||
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
|
||||
|
||||
EXPECT_FALSE(cfeState->getFusedEuDispatch());
|
||||
}
|
@ -9,6 +9,7 @@
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/state_base_address.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/unit_test/preamble/preamble_fixture.h"
|
||||
|
||||
#include "opencl/source/helpers/cl_memory_properties_helpers.h"
|
||||
#include "opencl/source/mem_obj/buffer.h"
|
||||
@ -356,3 +357,41 @@ XE_HPG_CORETEST_F(CmdsProgrammingTestsXeHpgCore, givenDecompressInL3ForImage2dFr
|
||||
|
||||
clReleaseMemObject(imageDesc.mem_object);
|
||||
}
|
||||
|
||||
using PreambleCfeState = PreambleFixture;
|
||||
|
||||
HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToTrue, IsXeHpgCore) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.fusedEuEnabled = false;
|
||||
|
||||
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
|
||||
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
|
||||
parseCommands<FamilyType>(linearStream);
|
||||
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), cfeStateIt);
|
||||
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
|
||||
|
||||
EXPECT_TRUE(cfeState->getFusedEuDispatch());
|
||||
}
|
||||
|
||||
HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThenFusedEuDispatchSetToFalse, IsXeHpgCore) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
auto hwInfo = *defaultHwInfo;
|
||||
hwInfo.capabilityTable.fusedEuEnabled = true;
|
||||
|
||||
auto pVfeCmd = PreambleHelper<FamilyType>::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::RenderCompute);
|
||||
StreamProperties streamProperties{};
|
||||
streamProperties.frontEndState.setProperties(false, false, false, hwInfo);
|
||||
PreambleHelper<FamilyType>::programVfeState(pVfeCmd, hwInfo, 0u, 0, 0, streamProperties);
|
||||
parseCommands<FamilyType>(linearStream);
|
||||
auto cfeStateIt = find<CFE_STATE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), cfeStateIt);
|
||||
auto cfeState = reinterpret_cast<CFE_STATE *>(*cfeStateIt);
|
||||
|
||||
EXPECT_FALSE(cfeState->getFusedEuDispatch());
|
||||
}
|
Reference in New Issue
Block a user