Disable EUFusion for odd work groups with DPAS on DG2

Related-To: NEO-7495, HSD-14017007475

Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2023-01-26 09:55:59 +00:00
committed by Compute-Runtime-Automation
parent 4c58eda90d
commit 429be6b4cb
26 changed files with 495 additions and 48 deletions

View File

@@ -259,9 +259,9 @@ struct CommandListCoreFamily : CommandListImp {
const CmdListKernelLaunchParams &launchParams);
ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions);
void updateStreamProperties(Kernel &kernel, bool isCooperative);
void updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative);
void updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative);
void updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect);
void updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect);
void updateStreamProperties(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect);
void clearCommandsToPatch();
size_t getTotalSizeForCopyRegion(const ze_copy_region_t *region, uint32_t pitch, uint32_t slicePitch);

View File

@@ -2344,32 +2344,48 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isCooperative) {
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect) {
if (this->isFlushTaskSubmissionEnabled) {
updateStreamPropertiesForFlushTaskDispatchFlags(kernel, isCooperative);
updateStreamPropertiesForFlushTaskDispatchFlags(kernel, isCooperative, threadGroupDimensions, isIndirect);
} else {
updateStreamPropertiesForRegularCommandLists(kernel, isCooperative);
updateStreamPropertiesForRegularCommandLists(kernel, isCooperative, threadGroupDimensions, isIndirect);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative) {
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForFlushTaskDispatchFlags(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect) {
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
bool fusedEuDisabled = kernelAttributes.flags.requiresDisabledEUFusion;
auto &productHelper = device->getProductHelper();
if (productHelper.isCalculationForDisablingEuFusionWithDpasNeeded()) {
if (threadGroupDimensions) {
uint32_t *groupCountPtr = nullptr;
uint32_t groupCount[3] = {};
if (!isIndirect) {
groupCount[0] = threadGroupDimensions->groupCountX;
groupCount[1] = threadGroupDimensions->groupCountY;
groupCount[2] = threadGroupDimensions->groupCountZ;
groupCountPtr = groupCount;
}
fusedEuDisabled |= productHelper.isFusedEuDisabledForDpas(kernelAttributes.flags.usesSystolicPipelineSelectMode, kernel.getGroupSize(), groupCountPtr);
}
}
requiredStreamState.stateComputeMode.setPropertiesGrfNumberThreadArbitration(kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, rootDeviceEnvironment);
requiredStreamState.frontEndState.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, rootDeviceEnvironment);
requiredStreamState.frontEndState.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperative, fusedEuDisabled, rootDeviceEnvironment);
requiredStreamState.pipelineSelect.setPropertySystolicMode(kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative) {
void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularCommandLists(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions, bool isIndirect) {
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
KernelImp &kernelImp = static_cast<KernelImp &>(kernel);
currentMocsState = static_cast<int32_t>(device->getMOCS(!kernelImp.getKernelRequiresUncachedMocs(), false) >> 1);
@@ -2391,8 +2407,23 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
currentIndirectObjectBaseAddress = ioh->getHeapGpuBase();
currentIndirectObjectSize = ioh->getHeapSizeInPages();
bool fusedEuDisabled = kernelAttributes.flags.requiresDisabledEUFusion;
auto &productHelper = device->getProductHelper();
if (productHelper.isCalculationForDisablingEuFusionWithDpasNeeded()) {
if (threadGroupDimensions) {
uint32_t *groupCountPtr = nullptr;
uint32_t groupCount[3] = {};
if (!isIndirect) {
groupCount[0] = threadGroupDimensions->groupCountX;
groupCount[1] = threadGroupDimensions->groupCountY;
groupCount[2] = threadGroupDimensions->groupCountZ;
groupCountPtr = groupCount;
}
fusedEuDisabled |= productHelper.isFusedEuDisabledForDpas(kernelAttributes.flags.usesSystolicPipelineSelectMode, kernel.getGroupSize(), groupCountPtr);
}
}
if (!containsAnyKernel) {
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
requiredStreamState.frontEndState.setProperties(isCooperative, fusedEuDisabled, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment);
requiredStreamState.stateBaseAddress.setProperties(kernelImp.getKernelDescriptor().kernelAttributes.flags.useGlobalAtomics, currentMocsState,
@@ -2425,7 +2456,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
rootDeviceEnvironment);
}
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
finalStreamState.frontEndState.setProperties(isCooperative, fusedEuDisabled, cmdListDefaultDisableOverdispatch, -1, rootDeviceEnvironment);
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
if (isPatchingVfeStateAllowed) {

View File

@@ -152,7 +152,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
std::list<void *> additionalCommands;
updateStreamProperties(*kernel, launchParams.isCooperative);
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect);
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
0, // eventAddress
neoDevice, // device

View File

@@ -265,7 +265,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
updateStreamProperties(*kernel, launchParams.isCooperative);
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions, launchParams.isIndirect);
auto localMemSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().localMemSize);
auto slmTotalSize = kernelImp->getSlmTotalSize();

View File

@@ -384,7 +384,6 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
NEO::computeWorkgroupSize2D(maxWorkGroupSize, retGroupSize, workItems, simd);
}
}
*groupSizeX = static_cast<uint32_t>(retGroupSize[0]);
*groupSizeY = static_cast<uint32_t>(retGroupSize[1]);
*groupSizeZ = static_cast<uint32_t>(retGroupSize[2]);

View File

@@ -77,6 +77,8 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::stateComputeModeTracking;
using BaseClass::unifiedMemoryControls;
using BaseClass::updateStreamProperties;
using BaseClass::updateStreamPropertiesForFlushTaskDispatchFlags;
using BaseClass::updateStreamPropertiesForRegularCommandLists;
WhiteBox() : ::L0::CommandListCoreFamily<gfxCoreFamily>(BaseClass::defaultNumIddsPerBlock) {}

View File

@@ -340,7 +340,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStr
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
commandList->updateStreamProperties(kernel, false);
const ze_group_count_t launchKernelArgs = {};
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
if (commandList->stateComputeModeTracking) {
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
@@ -350,7 +351,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStr
}
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
commandList->updateStreamProperties(kernel, false);
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
}
@@ -377,7 +378,8 @@ HWTEST2_F(CommandListAppendLaunchKernel,
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
commandList->updateStreamProperties(kernel, false);
const ze_group_count_t launchKernelArgs = {};
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
if (commandList->stateComputeModeTracking) {
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
if (productHelper.isGrfNumReportedWithScm()) {
@@ -391,7 +393,7 @@ HWTEST2_F(CommandListAppendLaunchKernel,
}
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
commandList->updateStreamProperties(kernel, false);
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
}
@@ -409,7 +411,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertes
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
commandList->updateStreamProperties(kernel, false);
const ze_group_count_t launchKernelArgs = {};
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
if (commandList->stateComputeModeTracking) {
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
@@ -418,7 +421,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertes
EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
}
commandList->updateStreamProperties(kernel, false);
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
}

View File

@@ -1374,18 +1374,19 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
// initial kernel with no policy preference
pCommandList->updateStreamProperties(kernel, false);
const ze_group_count_t launchKernelArgs = {};
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
// policy changed to non-default state
pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value = nonDefaultThreadArbitrationPolicy;
// another kernel with no policy preference - do not update policy
pCommandList->updateStreamProperties(kernel, false);
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_EQ(nonDefaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
// another kernel with no policy preference, this time with debug toggle set - update policy back to default value
DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.set(true);
pCommandList->updateStreamProperties(kernel, false);
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
}

View File

@@ -372,11 +372,12 @@ HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenR
const auto &productHelper = device->getProductHelper();
int32_t expectedDisableOverdispatch = productHelper.isDisableOverdispatchAvailable(*defaultHwInfo) ? 1 : -1;
pCommandList->updateStreamProperties(kernel, false);
const ze_group_count_t launchKernelArgs = {};
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
pCommandList->updateStreamProperties(kernel, false);
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
}

View File

@@ -217,23 +217,24 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsWhenUpdateStreamProp
auto &productHelper = device->getProductHelper();
int32_t expectedDispatchAllWalkerEnable = productHelper.isComputeDispatchAllWalkerEnableInCfeStateRequired(device->getHwInfo()) ? 0 : -1;
pCommandList->updateStreamProperties(defaultKernel, false);
const ze_group_count_t launchKernelArgs = {};
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs, false);
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(cooperativeKernel, true);
pCommandList->updateStreamProperties(cooperativeKernel, true);
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs, false);
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 1 : expectedDispatchAllWalkerEnable;
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(defaultKernel, false);
pCommandList->updateStreamProperties(cooperativeKernel, true);
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs, false);
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 0 : expectedDispatchAllWalkerEnable;
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 1 : expectedDispatchAllWalkerEnable;
@@ -242,18 +243,18 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsWhenUpdateStreamProp
EXPECT_EQ(expectedCommandsToPatch, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(cooperativeKernel, true);
pCommandList->updateStreamProperties(defaultKernel, false);
pCommandList->updateStreamProperties(cooperativeKernel, true);
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs, false);
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs, false);
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
expectedCommandsToPatch = expectedCommandsToPatch != 0 ? 2 : 0;
EXPECT_EQ(expectedCommandsToPatch, pCommandList->commandsToPatch.size());
pCommandList->reset();
pCommandList->updateStreamProperties(defaultKernel, false);
pCommandList->updateStreamProperties(defaultKernel, false);
pCommandList->updateStreamProperties(cooperativeKernel, true);
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs, false);
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs, false);
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 0 : expectedDispatchAllWalkerEnable;
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 1 : expectedDispatchAllWalkerEnable;
@@ -283,14 +284,15 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
pCommandList->updateStreamProperties(defaultKernel, false);
pCommandList->updateStreamProperties(cooperativeKernel, true);
const ze_group_count_t launchKernelArgs = {};
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs, false);
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
pCommandList->reset();
DebugManager.flags.AllowPatchingVfeStateInCommandLists.set(1);
pCommandList->updateStreamProperties(defaultKernel, false);
pCommandList->updateStreamProperties(cooperativeKernel, true);
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs, false);
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs, false);
const auto &productHelper = device->getProductHelper();
size_t expectedCmdsToPatch = productHelper.isComputeDispatchAllWalkerEnableInCfeStateRequired(device->getHwInfo()) ? 1 : 0;

View File

@@ -13,6 +13,7 @@
#include "level_zero/core/source/xe_hpg_core/cmdlist_xe_hpg_core.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
@@ -61,5 +62,128 @@ HWTEST2_F(CommandListTests, givenDG2WithBSteppingWhenCreatingCommandListThenAddi
EXPECT_TRUE(cmdSba->getDynamicStateBaseAddressModifyEnable());
EXPECT_TRUE(cmdSba->getDynamicStateBufferSizeModifyEnable());
}
HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenUpdateStreamPropertiesForRegularCommandListsCalledAndLwsIsOddThenFusedEuIsDisabled, IsDG2) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
const ze_group_count_t launchKernelArgs = {3, 1, 1};
kernel.groupSize[0] = 7;
kernel.groupSize[1] = 1;
kernel.groupSize[2] = 1;
commandList->updateStreamPropertiesForRegularCommandLists(kernel, false, &launchKernelArgs, false);
EXPECT_TRUE(commandList->finalStreamState.frontEndState.disableEUFusion.value);
}
HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenUpdateStreamPropertiesForRegularCommandListsCalledAndLwsIsNonOddThenFusedEuIsNotDisabled, IsDG2) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
const ze_group_count_t launchKernelArgs = {3, 1, 1};
kernel.groupSize[0] = 8;
kernel.groupSize[1] = 1;
kernel.groupSize[2] = 1;
commandList->updateStreamPropertiesForRegularCommandLists(kernel, false, &launchKernelArgs, false);
EXPECT_FALSE(commandList->finalStreamState.frontEndState.disableEUFusion.value);
}
HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenUpdateStreamPropertiesForRegularCommandListsCalledAndLwsOneAndDispatchIsIndirectThenFusedEuIsDisabled, IsDG2) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
const ze_group_count_t launchKernelArgs = {4, 1, 1};
kernel.groupSize[0] = 1;
kernel.groupSize[1] = 1;
kernel.groupSize[2] = 1;
commandList->updateStreamPropertiesForRegularCommandLists(kernel, false, &launchKernelArgs, true);
EXPECT_TRUE(commandList->finalStreamState.frontEndState.disableEUFusion.value);
}
HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenUpdateStreamPropertiesForFlushTaskDispatchFlagsCalledAndLwsIsOddThenFusedEuIsDisabled, IsDG2) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
const ze_group_count_t launchKernelArgs = {3, 1, 1};
kernel.groupSize[0] = 7;
kernel.groupSize[1] = 1;
kernel.groupSize[2] = 1;
commandList->updateStreamPropertiesForFlushTaskDispatchFlags(kernel, false, &launchKernelArgs, false);
EXPECT_TRUE(commandList->requiredStreamState.frontEndState.disableEUFusion.value);
}
HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenUpdateStreamPropertiesForFlushTaskDispatchFlagsCalledAndLwsIsNonOddThenFusedEuIsNotDisabled, IsDG2) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
const ze_group_count_t launchKernelArgs = {3, 1, 1};
kernel.groupSize[0] = 8;
kernel.groupSize[1] = 1;
kernel.groupSize[2] = 1;
commandList->updateStreamPropertiesForFlushTaskDispatchFlags(kernel, false, &launchKernelArgs, false);
EXPECT_FALSE(commandList->requiredStreamState.frontEndState.disableEUFusion.value);
}
HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenUpdateStreamPropertiesForFlushTaskDispatchFlagsCalledAndLwsOneAndDispatchIsIndirectThenFusedEuIsDisabled, IsDG2) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
const ze_group_count_t launchKernelArgs = {4, 1, 1};
kernel.groupSize[0] = 1;
kernel.groupSize[1] = 1;
kernel.groupSize[2] = 1;
commandList->updateStreamPropertiesForFlushTaskDispatchFlags(kernel, false, &launchKernelArgs, true);
EXPECT_TRUE(commandList->requiredStreamState.frontEndState.disableEUFusion.value);
}
HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenUpdateStreamPropertiesForFlushTaskDispatchFlagsCalledAndGroupCountIsNullptrThenFusedEuIsNotDisabled, IsDG2) {
Mock<::L0::Kernel> kernel;
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
kernel.module = pMockModule.get();
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
kernel.groupSize[0] = 1;
kernel.groupSize[1] = 1;
kernel.groupSize[2] = 1;
commandList->updateStreamPropertiesForFlushTaskDispatchFlags(kernel, false, nullptr, true);
EXPECT_FALSE(commandList->requiredStreamState.frontEndState.disableEUFusion.value);
}
} // namespace ult
} // namespace L0

View File

@@ -348,7 +348,8 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
commandList->updateStreamProperties(kernel, false);
const ze_group_count_t launchKernelArgs = {};
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
if (commandList->stateComputeModeTracking) {
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
if (productHelper.isGrfNumReportedWithScm()) {
@@ -361,7 +362,7 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
}
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
commandList->updateStreamProperties(kernel, false);
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
}
@@ -385,7 +386,8 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
commandList->updateStreamProperties(kernel, false);
const ze_group_count_t launchKernelArgs = {};
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
if (commandList->stateComputeModeTracking) {
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
@@ -393,9 +395,8 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
}
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
commandList->updateStreamProperties(kernel, false);
commandList->updateStreamProperties(kernel, false, &launchKernelArgs, false);
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
}