mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Disable EUFusion for odd work groups with DPAS on DG2
Related-To: NEO-7495, HSD-14017007475 Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ac63175a0f
commit
017d66a469
@@ -258,7 +258,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
const CmdListKernelLaunchParams &launchParams);
|
||||
|
||||
ze_result_t prepareIndirectParams(const ze_group_count_t *threadGroupDimensions);
|
||||
void updateStreamProperties(Kernel &kernel, bool isCooperative);
|
||||
void updateStreamProperties(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions);
|
||||
void updateStateBaseAddressStreamProperties(Kernel &kernel, bool updateRequiredState, bool captureBaseAddressState);
|
||||
void clearCommandsToPatch();
|
||||
|
||||
|
||||
@@ -2371,15 +2371,21 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStateBaseAddressStreamPropertie
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isCooperative) {
|
||||
void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel, bool isCooperative, const ze_group_count_t *threadGroupDimensions) {
|
||||
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
|
||||
|
||||
auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment();
|
||||
|
||||
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
|
||||
bool captureBaseAddressState = containsAnyKernel;
|
||||
bool fusedEuDisabled = kernelAttributes.flags.requiresDisabledEUFusion;
|
||||
auto &productHelper = device->getProductHelper();
|
||||
if (threadGroupDimensions) {
|
||||
uint32_t groupCount[3] = {threadGroupDimensions->groupCountX, threadGroupDimensions->groupCountY, threadGroupDimensions->groupCountZ};
|
||||
fusedEuDisabled |= productHelper.isFusedEuDisabledForDpas(kernelAttributes.flags.usesSystolicPipelineSelectMode, kernel.getGroupSize(), groupCount);
|
||||
}
|
||||
if (!containsAnyKernel) {
|
||||
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, rootDeviceEnvironment);
|
||||
requiredStreamState.frontEndState.setProperties(isCooperative, fusedEuDisabled, true, -1, rootDeviceEnvironment);
|
||||
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, rootDeviceEnvironment);
|
||||
|
||||
if (!this->isFlushTaskSubmissionEnabled) {
|
||||
@@ -2410,7 +2416,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
||||
rootDeviceEnvironment);
|
||||
}
|
||||
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, rootDeviceEnvironment);
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, fusedEuDisabled, true, -1, rootDeviceEnvironment);
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
|
||||
if (isPatchingVfeStateAllowed) {
|
||||
|
||||
@@ -131,7 +131,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
|
||||
std::list<void *> additionalCommands;
|
||||
|
||||
updateStreamProperties(*kernel, launchParams.isCooperative);
|
||||
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions);
|
||||
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
|
||||
0, // eventAddress
|
||||
neoDevice, // device
|
||||
|
||||
@@ -247,7 +247,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
|
||||
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
|
||||
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
|
||||
|
||||
updateStreamProperties(*kernel, launchParams.isCooperative);
|
||||
updateStreamProperties(*kernel, launchParams.isCooperative, threadGroupDimensions);
|
||||
|
||||
auto localMemSize = static_cast<uint32_t>(neoDevice->getDeviceInfo().localMemSize);
|
||||
auto slmTotalSize = kernelImp->getSlmTotalSize();
|
||||
|
||||
@@ -368,9 +368,12 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
bool requiresEuFusionDisable = kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion ||
|
||||
neoDevice->getProductHelper().isFusedEuDisabledForDpas(kernelImmData->getDescriptor().kernelAttributes.flags.usesSystolicPipelineSelectMode, nullptr, nullptr);
|
||||
|
||||
NEO::WorkSizeInfo wsInfo(maxWorkGroupSize, kernelImmData->getDescriptor().kernelAttributes.usesBarriers(), simd, this->getSlmTotalSize(),
|
||||
neoDevice->getRootDeviceEnvironment(), numThreadsPerSubSlice, localMemSize,
|
||||
usesImages, false, kernelImmData->getDescriptor().kernelAttributes.flags.requiresDisabledEUFusion);
|
||||
usesImages, false, requiresEuFusionDisable);
|
||||
NEO::computeWorkgroupSizeND(wsInfo, retGroupSize, workItems, dim);
|
||||
} else {
|
||||
if (1U == dim) {
|
||||
@@ -381,7 +384,6 @@ ze_result_t KernelImp::suggestGroupSize(uint32_t globalSizeX, uint32_t globalSiz
|
||||
NEO::computeWorkgroupSize2D(maxWorkGroupSize, retGroupSize, workItems, simd);
|
||||
}
|
||||
}
|
||||
|
||||
*groupSizeX = static_cast<uint32_t>(retGroupSize[0]);
|
||||
*groupSizeY = static_cast<uint32_t>(retGroupSize[1]);
|
||||
*groupSizeZ = static_cast<uint32_t>(retGroupSize[2]);
|
||||
@@ -747,7 +749,12 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties)
|
||||
|
||||
preferredGroupSizeProperties->preferredMultiple = this->kernelImmData->getKernelInfo()->getMaxSimdSize();
|
||||
auto &gfxCoreHelper = this->module->getDevice()->getGfxCoreHelper();
|
||||
if (gfxCoreHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo(), kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion)) {
|
||||
auto &productHelper = this->module->getDevice()->getProductHelper();
|
||||
|
||||
bool requiresEuFusionDisabled = kernelDescriptor.kernelAttributes.flags.requiresDisabledEUFusion ||
|
||||
productHelper.isFusedEuDisabledForDpas(kernelImmData->getDescriptor().kernelAttributes.flags.usesSystolicPipelineSelectMode, nullptr, nullptr);
|
||||
|
||||
if (gfxCoreHelper.isFusedEuDispatchEnabled(this->module->getDevice()->getHwInfo(), requiresEuFusionDisabled)) {
|
||||
preferredGroupSizeProperties->preferredMultiple *= 2;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -340,7 +340,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStr
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
const ze_group_count_t launchKernelArgs = {};
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
if (commandList->stateComputeModeTracking) {
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
@@ -350,7 +351,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenUpdateStr
|
||||
}
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
}
|
||||
@@ -377,7 +378,8 @@ HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
const ze_group_count_t launchKernelArgs = {};
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
if (commandList->stateComputeModeTracking) {
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
if (productHelper.isGrfNumReportedWithScm()) {
|
||||
@@ -391,7 +393,7 @@ HWTEST2_F(CommandListAppendLaunchKernel,
|
||||
}
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
}
|
||||
@@ -409,7 +411,8 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertes
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
const ze_group_count_t launchKernelArgs = {};
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
if (commandList->stateComputeModeTracking) {
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
@@ -418,7 +421,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenComputeModePropertiesWhenPropertes
|
||||
EXPECT_EQ(productHelper.isGrfNumReportedWithScm(), commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
}
|
||||
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
}
|
||||
|
||||
@@ -1374,18 +1374,19 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
// initial kernel with no policy preference
|
||||
pCommandList->updateStreamProperties(kernel, false);
|
||||
const ze_group_count_t launchKernelArgs = {};
|
||||
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
|
||||
|
||||
// policy changed to non-default state
|
||||
pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value = nonDefaultThreadArbitrationPolicy;
|
||||
// another kernel with no policy preference - do not update policy
|
||||
pCommandList->updateStreamProperties(kernel, false);
|
||||
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_EQ(nonDefaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
|
||||
|
||||
// another kernel with no policy preference, this time with debug toggle set - update policy back to default value
|
||||
DebugManager.flags.ForceDefaultThreadArbitrationPolicyIfNotSpecified.set(true);
|
||||
pCommandList->updateStreamProperties(kernel, false);
|
||||
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
|
||||
}
|
||||
|
||||
|
||||
@@ -372,11 +372,12 @@ HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenR
|
||||
const auto &productHelper = device->getProductHelper();
|
||||
int32_t expectedDisableOverdispatch = productHelper.isDisableOverdispatchAvailable(*defaultHwInfo) ? 1 : -1;
|
||||
|
||||
pCommandList->updateStreamProperties(kernel, false);
|
||||
const ze_group_count_t launchKernelArgs = {};
|
||||
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
|
||||
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
|
||||
|
||||
pCommandList->updateStreamProperties(kernel, false);
|
||||
pCommandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value);
|
||||
EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value);
|
||||
}
|
||||
|
||||
@@ -217,23 +217,24 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsWhenUpdateStreamProp
|
||||
auto &productHelper = device->getProductHelper();
|
||||
int32_t expectedDispatchAllWalkerEnable = productHelper.isComputeDispatchAllWalkerEnableInCfeStateRequired(device->getHwInfo()) ? 0 : -1;
|
||||
|
||||
pCommandList->updateStreamProperties(defaultKernel, false);
|
||||
const ze_group_count_t launchKernelArgs = {};
|
||||
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs);
|
||||
|
||||
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
|
||||
pCommandList->reset();
|
||||
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs);
|
||||
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 1 : expectedDispatchAllWalkerEnable;
|
||||
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
|
||||
pCommandList->reset();
|
||||
|
||||
pCommandList->updateStreamProperties(defaultKernel, false);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true);
|
||||
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs);
|
||||
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 0 : expectedDispatchAllWalkerEnable;
|
||||
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 1 : expectedDispatchAllWalkerEnable;
|
||||
@@ -242,18 +243,18 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsWhenUpdateStreamProp
|
||||
EXPECT_EQ(expectedCommandsToPatch, pCommandList->commandsToPatch.size());
|
||||
pCommandList->reset();
|
||||
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true);
|
||||
pCommandList->updateStreamProperties(defaultKernel, false);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs);
|
||||
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs);
|
||||
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->finalStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
expectedCommandsToPatch = expectedCommandsToPatch != 0 ? 2 : 0;
|
||||
EXPECT_EQ(expectedCommandsToPatch, pCommandList->commandsToPatch.size());
|
||||
pCommandList->reset();
|
||||
|
||||
pCommandList->updateStreamProperties(defaultKernel, false);
|
||||
pCommandList->updateStreamProperties(defaultKernel, false);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true);
|
||||
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs);
|
||||
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs);
|
||||
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 0 : expectedDispatchAllWalkerEnable;
|
||||
EXPECT_EQ(expectedDispatchAllWalkerEnable, pCommandList->requiredStreamState.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
expectedDispatchAllWalkerEnable = expectedDispatchAllWalkerEnable != -1 ? 1 : expectedDispatchAllWalkerEnable;
|
||||
@@ -283,14 +284,15 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenVariousKernelsAndPatchingDisallowe
|
||||
auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
pCommandList->updateStreamProperties(defaultKernel, false);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true);
|
||||
const ze_group_count_t launchKernelArgs = {};
|
||||
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs);
|
||||
EXPECT_EQ(0u, pCommandList->commandsToPatch.size());
|
||||
pCommandList->reset();
|
||||
|
||||
DebugManager.flags.AllowPatchingVfeStateInCommandLists.set(1);
|
||||
pCommandList->updateStreamProperties(defaultKernel, false);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true);
|
||||
pCommandList->updateStreamProperties(defaultKernel, false, &launchKernelArgs);
|
||||
pCommandList->updateStreamProperties(cooperativeKernel, true, &launchKernelArgs);
|
||||
|
||||
const auto &productHelper = device->getProductHelper();
|
||||
size_t expectedCmdsToPatch = productHelper.isComputeDispatchAllWalkerEnableInCfeStateRequired(device->getHwInfo()) ? 1 : 0;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2022 Intel Corporation
|
||||
* Copyright (C) 2021-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "level_zero/core/source/xe_hpg_core/cmdlist_xe_hpg_core.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
|
||||
namespace L0 {
|
||||
@@ -61,5 +62,40 @@ HWTEST2_F(CommandListTests, givenDG2WithBSteppingWhenCreatingCommandListThenAddi
|
||||
EXPECT_TRUE(cmdSba->getDynamicStateBaseAddressModifyEnable());
|
||||
EXPECT_TRUE(cmdSba->getDynamicStateBufferSizeModifyEnable());
|
||||
}
|
||||
HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenLwsIsOddThenFesedEuIsDisabled, IsDG2) {
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
|
||||
const ze_group_count_t launchKernelArgs = {3, 1, 1};
|
||||
kernel.groupSize[0] = 7;
|
||||
kernel.groupSize[1] = 1;
|
||||
kernel.groupSize[2] = 1;
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_TRUE(commandList->finalStreamState.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
HWTEST2_F(CommandListTests, GivenKernelWithDpasWhenLwsIsNonOddThenFesedEuIsNotDisabled, IsDG2) {
|
||||
Mock<::L0::Kernel> kernel;
|
||||
auto pMockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
|
||||
kernel.module = pMockModule.get();
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
|
||||
const ze_group_count_t launchKernelArgs = {3, 1, 1};
|
||||
kernel.groupSize[0] = 8;
|
||||
kernel.groupSize[1] = 1;
|
||||
kernel.groupSize[2] = 1;
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_FALSE(commandList->finalStreamState.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -69,5 +69,115 @@ HWTEST2_F(KernelTestDG2, givenKernelImpWhenSetBufferSurfaceStateCalledThenProgra
|
||||
context->freeMem(devicePtr);
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelTestDG2, givenKernelImpWithDpasWhenSuggestingWGSizeThenSizeIsDifferntThanWithoutDpas, IsDG2) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
ze_kernel_handle_t kernelHandle;
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = kernelName.c_str();
|
||||
|
||||
ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
auto kernelImp = reinterpret_cast<L0::KernelImp *>(L0::Kernel::fromHandle(kernelHandle));
|
||||
|
||||
reinterpret_cast<NEO::MockDevice *>(module->getDevice()->getNEODevice())->deviceInfo.maxNumEUsPerSubSlice = 16;
|
||||
reinterpret_cast<NEO::MockDevice *>(module->getDevice()->getNEODevice())->deviceInfo.numThreadsPerEU = 8;
|
||||
uint32_t groupSizeXDpas = 79u;
|
||||
uint32_t groupSizeYDpas = 14;
|
||||
uint32_t groupSizeZDpas = 1u;
|
||||
const_cast<NEO::KernelDescriptor &>(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
|
||||
kernelImp->suggestGroupSize(groupSizeXDpas, groupSizeYDpas, groupSizeZDpas, &groupSizeXDpas, &groupSizeYDpas, &groupSizeZDpas);
|
||||
uint32_t groupSizeXNoDpas = 79u;
|
||||
uint32_t groupSizeYNoDpas = 14u;
|
||||
uint32_t groupSizeZNoDpas = 1u;
|
||||
const_cast<NEO::KernelDescriptor &>(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = false;
|
||||
kernelImp->suggestGroupSize(groupSizeXNoDpas, groupSizeYNoDpas, groupSizeZNoDpas, &groupSizeXNoDpas, &groupSizeYNoDpas, &groupSizeZNoDpas);
|
||||
EXPECT_TRUE(groupSizeXDpas != groupSizeXNoDpas || groupSizeYDpas != groupSizeYNoDpas);
|
||||
Kernel::fromHandle(kernelHandle)->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelTestDG2, givenKernelImpWithFusedEuDisabledWhenSuggestingWGSizeThenSizeIsDifferntThanWithoutDpas, IsDG2) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
ze_kernel_handle_t kernelHandle;
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = kernelName.c_str();
|
||||
|
||||
ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
auto kernelImp = reinterpret_cast<L0::KernelImp *>(L0::Kernel::fromHandle(kernelHandle));
|
||||
|
||||
reinterpret_cast<NEO::MockDevice *>(module->getDevice()->getNEODevice())->deviceInfo.maxNumEUsPerSubSlice = 16;
|
||||
reinterpret_cast<NEO::MockDevice *>(module->getDevice()->getNEODevice())->deviceInfo.numThreadsPerEU = 8;
|
||||
uint32_t groupSizeXEuFusionDisabled = 79u;
|
||||
uint32_t groupSizeYEuFusionDisabled = 14;
|
||||
uint32_t groupSizeZEuFusionDisabled = 1u;
|
||||
const_cast<NEO::KernelDescriptor &>(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true;
|
||||
kernelImp->suggestGroupSize(groupSizeXEuFusionDisabled, groupSizeYEuFusionDisabled, groupSizeZEuFusionDisabled, &groupSizeXEuFusionDisabled, &groupSizeYEuFusionDisabled, &groupSizeZEuFusionDisabled);
|
||||
uint32_t groupSizeXNoEuFusionDisabled = 79u;
|
||||
uint32_t groupSizeYNoEuFusionDisabled = 14;
|
||||
uint32_t groupSizeZNoEuFusionDisabled = 1u;
|
||||
const_cast<NEO::KernelDescriptor &>(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false;
|
||||
kernelImp->suggestGroupSize(groupSizeXNoEuFusionDisabled, groupSizeYNoEuFusionDisabled, groupSizeZNoEuFusionDisabled, &groupSizeXNoEuFusionDisabled, &groupSizeYNoEuFusionDisabled, &groupSizeZNoEuFusionDisabled);
|
||||
EXPECT_TRUE(groupSizeXEuFusionDisabled != groupSizeXNoEuFusionDisabled || groupSizeYEuFusionDisabled != groupSizeYNoEuFusionDisabled);
|
||||
Kernel::fromHandle(kernelHandle)->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelTestDG2, givenKernelImpWithDpasWhenGetPreferredWorkGroupSizeThenReturnedSizeIsLowerThanSizeForKernelWithoutDpas, IsDG2) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
ze_kernel_handle_t kernelHandle;
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = kernelName.c_str();
|
||||
|
||||
ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
auto kernelImp = reinterpret_cast<L0::KernelImp *>(L0::Kernel::fromHandle(kernelHandle));
|
||||
ze_kernel_properties_t properties = {};
|
||||
ze_kernel_preferred_group_size_properties_t extProperties = {};
|
||||
extProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES;
|
||||
properties.pNext = &extProperties;
|
||||
const_cast<NEO::KernelDescriptor &>(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = true;
|
||||
kernelImp->getProperties(&properties);
|
||||
auto sizeWithDpas = extProperties.preferredMultiple;
|
||||
const_cast<NEO::KernelDescriptor &>(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.usesSystolicPipelineSelectMode = false;
|
||||
kernelImp->getProperties(&properties);
|
||||
auto sizeWithoutDpas = extProperties.preferredMultiple;
|
||||
EXPECT_NE(sizeWithDpas, sizeWithoutDpas);
|
||||
Kernel::fromHandle(kernelHandle)->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(KernelTestDG2, givenKernelImpWithFusedEuDisabledWhenGetPreferredWorkGroupSizeThenReturnedSizeIsLowerThanSizeForKernelWithoutFusedEuEnabled, IsDG2) {
|
||||
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
|
||||
ze_kernel_handle_t kernelHandle;
|
||||
|
||||
ze_kernel_desc_t kernelDesc = {};
|
||||
kernelDesc.pKernelName = kernelName.c_str();
|
||||
|
||||
ze_result_t res = module->createKernel(&kernelDesc, &kernelHandle);
|
||||
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, res);
|
||||
|
||||
auto kernelImp = reinterpret_cast<L0::KernelImp *>(L0::Kernel::fromHandle(kernelHandle));
|
||||
ze_kernel_properties_t properties = {};
|
||||
ze_kernel_preferred_group_size_properties_t extProperties = {};
|
||||
extProperties.stype = ZE_STRUCTURE_TYPE_KERNEL_PREFERRED_GROUP_SIZE_PROPERTIES;
|
||||
properties.pNext = &extProperties;
|
||||
const_cast<NEO::KernelDescriptor &>(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = true;
|
||||
kernelImp->getProperties(&properties);
|
||||
auto sizeWithDpas = extProperties.preferredMultiple;
|
||||
const_cast<NEO::KernelDescriptor &>(kernelImp->getImmutableData()->getDescriptor()).kernelAttributes.flags.requiresDisabledEUFusion = false;
|
||||
kernelImp->getProperties(&properties);
|
||||
auto sizeWithoutDpas = extProperties.preferredMultiple;
|
||||
EXPECT_NE(sizeWithDpas, sizeWithoutDpas);
|
||||
Kernel::fromHandle(kernelHandle)->destroy();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -348,7 +348,8 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
const ze_group_count_t launchKernelArgs = {};
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
if (commandList->stateComputeModeTracking) {
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
if (productHelper.isGrfNumReportedWithScm()) {
|
||||
@@ -361,7 +362,7 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie
|
||||
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
}
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
}
|
||||
@@ -385,7 +386,8 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie
|
||||
auto result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x100;
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
const ze_group_count_t launchKernelArgs = {};
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
if (commandList->stateComputeModeTracking) {
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
@@ -393,9 +395,8 @@ HWTEST2_F(CommandListCreate, GivenComputeModePropertiesWhenUpdateStreamPropertie
|
||||
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
}
|
||||
|
||||
const_cast<NEO::KernelDescriptor *>(&kernel.getKernelDescriptor())->kernelAttributes.numGrfRequired = 0x80;
|
||||
commandList->updateStreamProperties(kernel, false);
|
||||
commandList->updateStreamProperties(kernel, false, &launchKernelArgs);
|
||||
EXPECT_TRUE(commandList->finalStreamState.stateComputeMode.largeGrfMode.isDirty);
|
||||
EXPECT_FALSE(commandList->finalStreamState.stateComputeMode.isCoherencyRequired.isDirty);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user