Share front end state updates between regular and immediate command lists

Related-To: NEO-5019

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2022-09-20 16:46:15 +00:00 committed by Compute-Runtime-Automation
parent 7f0619e6b9
commit 5986a7199a
16 changed files with 415 additions and 22 deletions

View File

@ -2315,12 +2315,10 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
auto &hwInfo = device->getHwInfo();
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo);
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
if (!containsAnyKernel) {
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo);
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
finalStreamState = requiredStreamState;
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
@ -2340,7 +2338,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
hwInfo);
}
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo);
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
if (isPatchingVfeStateAllowed) {

View File

@ -622,18 +622,16 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::setFrontEndStateProperties(CommandListExecutionContext &ctx) {
const auto &hwInfo = this->device->getHwInfo();
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo);
auto isEngineInstanced = csr->getOsContext().isEngineInstanced();
auto &streamProperties = this->csr->getStreamProperties();
if (!frontEndTrackingEnabled()) {
streamProperties.frontEndState.setProperties(ctx.anyCommandListWithCooperativeKernels, ctx.anyCommandListRequiresDisabledEUFusion,
disableOverdispatch, isEngineInstanced, hwInfo);
true, isEngineInstanced, hwInfo);
ctx.frontEndStateDirty |= (streamProperties.frontEndState.isDirty() && !this->csr->getLogicalStateHelper());
} else {
ctx.engineInstanced = isEngineInstanced;
}
ctx.frontEndStateDirty |= (streamProperties.frontEndState.isDirty() && !this->csr->getLogicalStateHelper());
ctx.frontEndStateDirty |= csr->getMediaVFEStateDirty();
}

View File

@ -97,6 +97,8 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) {
NEO::EngineGroupType engineGroupType = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getEngineGroupType(neoDevice->getDefaultEngine().getEngineType(), neoDevice->getDefaultEngine().getEngineUsage(), device->getHwInfo());
commandList.reset(whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue)));
commandListImmediate.reset(whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, engineGroupType, returnValue)));
commandListImmediate->isFlushTaskSubmissionEnabled = true;
mockKernelImmData = std::make_unique<MockImmutableData>(0u);
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
@ -108,6 +110,7 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) {
void ModuleMutableCommandListFixture::tearDown() {
commandQueue->destroy();
commandList.reset(nullptr);
commandListImmediate.reset(nullptr);
kernel.reset(nullptr);
mockKernelImmData.reset(nullptr);
ModuleImmutableDataFixture::tearDown();

View File

@ -75,6 +75,7 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture {
std::unique_ptr<MockImmutableData> mockKernelImmData;
std::unique_ptr<L0::ult::CommandList> commandList;
std::unique_ptr<L0::ult::CommandList> commandListImmediate;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
L0::ult::CommandQueue *commandQueue;
};

View File

@ -146,6 +146,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::commandListPreemptionMode;
using BaseClass::csr;
using BaseClass::initialize;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::multiReturnPointCommandList;
using BaseClass::nonImmediateLogicalStateHelper;
using BaseClass::partitionCount;

View File

@ -76,7 +76,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
NEO::FrontEndPropertiesSupport frontEndPropertiesSupport = {};
hwInfoConfig.fillFrontEndPropertiesSupportStructure(frontEndPropertiesSupport, device->getHwInfo());
int expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*defaultHwInfo);
int expectedDisableOverdispatch = frontEndPropertiesSupport.disableOverdispatch;
int32_t expectedIsCoherencyRequired = scmPropertiesSupport.coherencyRequired ? hwHelper.forceNonGpuCoherencyWA(true) : -1;
int expectedLargeGrfMode = scmPropertiesSupport.largeGrfMode ? 1 : -1;
int expectedThreadArbitrationPolicy = scmPropertiesSupport.threadArbitrationPolicy ? NEO::ThreadArbitrationPolicy::RoundRobin : -1;

View File

@ -1384,5 +1384,251 @@ HWTEST2_F(MultiReturnCommandListTest,
}
}
HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothRegularFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) {
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
EXPECT_TRUE(commandList->multiReturnPointCommandList);
EXPECT_TRUE(commandListImmediate->multiReturnPointCommandList);
auto &regularCmdListStream = *commandList->commandContainer.getCommandStream();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
size_t usedBefore = regularCmdListStream.getUsed();
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = regularCmdListStream.getUsed();
auto &regularCmdListRequiredState = commandList->getRequiredStreamState();
auto &regularCmdListFinalState = commandList->getFinalStreamState();
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, regularCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(1, regularCmdListFinalState.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, regularCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(-1, regularCmdListFinalState.frontEndState.disableEUFusion.value);
}
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(regularCmdListStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
auto feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, feStateCmds.size());
auto &cmdQueueStream = commandQueue->commandStream;
auto cmdListHandle = commandList->toHandle();
usedBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
usedAfter = cmdQueueStream.getUsed();
auto cmdQueueCsr = commandQueue->getCsr();
auto &csrProperties = cmdQueueCsr->getStreamProperties();
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value);
}
cmdList.clear();
feStateCmds.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdQueueStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, feStateCmds.size());
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateCmds[0]);
if (fePropertiesSupport.disableEuFusion) {
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
} else {
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
}
auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream();
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto &csrStream = ultCsr.commandStream;
size_t csrUsedBefore = csrStream.getUsed();
usedBefore = immediateCmdListStream.getUsed();
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
usedAfter = immediateCmdListStream.getUsed();
size_t csrUsedAfter = csrStream.getUsed();
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(-1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
}
cmdList.clear();
feStateCmds.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(immediateCmdListStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, feStateCmds.size());
auto immediateCsr = commandListImmediate->csr;
EXPECT_EQ(cmdQueueCsr, immediateCsr);
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value);
}
cmdList.clear();
feStateCmds.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
(csrUsedAfter - csrUsedBefore)));
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, feStateCmds.size());
}
HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothImmediateFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) {
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
EXPECT_TRUE(commandList->multiReturnPointCommandList);
EXPECT_TRUE(commandListImmediate->multiReturnPointCommandList);
auto cmdQueueCsr = commandQueue->getCsr();
auto &csrProperties = cmdQueueCsr->getStreamProperties();
auto immediateCsr = commandListImmediate->csr;
EXPECT_EQ(cmdQueueCsr, immediateCsr);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream();
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
auto &csrStream = ultCsr.commandStream;
size_t csrUsedBefore = csrStream.getUsed();
size_t usedBefore = immediateCmdListStream.getUsed();
ze_result_t result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = immediateCmdListStream.getUsed();
size_t csrUsedAfter = csrStream.getUsed();
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(-1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
}
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(immediateCmdListStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
auto feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, feStateCmds.size());
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value);
}
cmdList.clear();
feStateCmds.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
(csrUsedAfter - csrUsedBefore)));
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, feStateCmds.size());
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateCmds[0]);
if (fePropertiesSupport.disableEuFusion) {
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
} else {
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
}
auto &regularCmdListStream = *commandList->commandContainer.getCommandStream();
usedBefore = regularCmdListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
usedAfter = regularCmdListStream.getUsed();
auto &regularCmdListRequiredState = commandList->getRequiredStreamState();
auto &regularCmdListFinalState = commandList->getFinalStreamState();
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, regularCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(1, regularCmdListFinalState.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, regularCmdListRequiredState.frontEndState.disableEUFusion.value);
EXPECT_EQ(-1, regularCmdListFinalState.frontEndState.disableEUFusion.value);
}
cmdList.clear();
feStateCmds.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(regularCmdListStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, feStateCmds.size());
auto &cmdQueueStream = commandQueue->commandStream;
auto cmdListHandle = commandList->toHandle();
usedBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
usedAfter = cmdQueueStream.getUsed();
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value);
} else {
EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value);
}
cmdList.clear();
feStateCmds.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdQueueStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, feStateCmds.size());
}
} // namespace ult
} // namespace L0

View File

@ -6,6 +6,7 @@
*/
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/basic_math.h"
@ -2418,6 +2419,8 @@ HWTEST_F(KernelExecutionTypesTests, givenKernelWithDifferentExecutionTypeWhileDo
size_t gws[3] = {63, 0, 0};
auto &mockCsr = device->getUltCommandStreamReceiver<FamilyType>();
mockCsr.feSupportFlags.computeDispatchAllWalker = true;
pKernel->setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL);
mockCmdQ->enqueueKernel(pKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
EXPECT_EQ(mockCsr.lastKernelExecutionType, KernelExecutionType::Concurrent);

View File

@ -1529,3 +1529,118 @@ HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSetWhe
EXPECT_EQ(0u, pipeControl->getImmediateData());
EXPECT_EQ(gpuAddress, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
}
HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateNotInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = true;
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotApplicable;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = false;
commandStreamReceiver.lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true;
dispatchFlags.kernelExecutionType = KernelExecutionType::NotApplicable;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
commandStreamReceiver.lastKernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.feSupportFlags.disableEuFusion = true;
dispatchFlags.disableEUFusion = false;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = 0;
dispatchFlags.disableEUFusion = true;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
dispatchFlags.disableEUFusion = false;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = -1;
dispatchFlags.disableEUFusion = false;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
}
HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) {
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = true;
commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 0;
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 1;
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.feSupportFlags.disableOverdispatch = false;
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true;
commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 0;
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 1;
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
commandStreamReceiver.setMediaVFEStateDirty(false);
}

View File

@ -153,6 +153,7 @@ struct UltCommandStreamReceiverTest
commandStreamReceiver.lastSentUseGlobalAtomics = false;
commandStreamReceiver.streamProperties.stateComputeMode.setProperties(0, GrfConfig::DefaultGrfNumber,
hwHelper.getDefaultThreadArbitrationPolicy(), pDevice->getPreemptionMode(), *defaultHwInfo);
commandStreamReceiver.streamProperties.frontEndState.setProperties(false, false, false, -1, *defaultHwInfo);
auto logicalStateHelper = commandStreamReceiver.getLogicalStateHelper();

View File

@ -74,7 +74,9 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi
this->staticWorkPartitioningEnabled = true;
}
this->systolicModeConfigurable = HwInfoConfig::get(hwInfo.platform.eProductFamily)->isSystolicModeConfigurable(hwInfo);
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
this->systolicModeConfigurable = hwInfoConfig->isSystolicModeConfigurable(hwInfo);
hwInfoConfig->fillFrontEndPropertiesSupportStructure(feSupportFlags, hwInfo);
}
CommandStreamReceiver::~CommandStreamReceiver() {

View File

@ -392,6 +392,7 @@ class CommandStreamReceiver {
LinearStream commandStream;
StreamProperties streamProperties{};
FrontEndPropertiesSupport feSupportFlags{};
// offset for debug state is 1kbyte, tag writes can use multiple offsets for multiple partitions and each offset can vary per platform
const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte;

View File

@ -177,6 +177,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
void unregisterDirectSubmissionFromController();
constexpr bool isGlobalAtomicsProgrammingRequired(bool currentValue) const;
void createKernelArgsBufferAllocation() override;
void handleFrontEndStateTransition(DispatchFlags &dispatchFlags);
HeapDirtyState dshState;
HeapDirtyState iohState;

View File

@ -308,13 +308,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
}
if (!logicalStateHelper) {
if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo) {
setMediaVFEStateDirty(true);
}
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType) {
setMediaVFEStateDirty(true);
}
handleFrontEndStateTransition(dispatchFlags);
}
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
@ -970,14 +964,14 @@ inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &cs
lastKernelExecutionType = dispatchFlags.kernelExecutionType;
}
auto &hwInfo = peekHwInfo();
auto isCooperative = dispatchFlags.kernelExecutionType == KernelExecutionType::Concurrent;
auto disableOverdispatch = (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet);
streamProperties.frontEndState.setProperties(isCooperative, dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced(), hwInfo);
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
auto engineGroupType = hwHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo);
auto pVfeState = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csr, hwInfo, engineGroupType);
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo) &&
(dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet);
streamProperties.frontEndState.setProperties(lastKernelExecutionType == KernelExecutionType::Concurrent,
dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced(), hwInfo);
PreambleHelper<GfxFamily>::programVfeState(
pVfeState, hwInfo, requiredScratchSize, getScratchPatchAddress(),
maxFrontEndThreads, streamProperties, logicalStateHelper.get());
@ -1477,4 +1471,29 @@ void CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission() {
this->flushSmallTask(commandStream, commandStreamStart);
}
template <typename GfxFamily>
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(DispatchFlags &dispatchFlags) {
if (streamProperties.frontEndState.disableOverdispatch.value != -1) {
lastAdditionalKernelExecInfo = streamProperties.frontEndState.disableOverdispatch.value == 1 ? AdditionalKernelExecInfo::DisableOverdispatch : AdditionalKernelExecInfo::NotSet;
}
if (streamProperties.frontEndState.computeDispatchAllWalkerEnable.value != -1) {
lastKernelExecutionType = streamProperties.frontEndState.computeDispatchAllWalkerEnable.value == 1 ? KernelExecutionType::Concurrent : KernelExecutionType::Default;
}
if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo &&
feSupportFlags.disableOverdispatch) {
setMediaVFEStateDirty(true);
}
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType &&
feSupportFlags.computeDispatchAllWalker) {
setMediaVFEStateDirty(true);
}
if ((streamProperties.frontEndState.disableEUFusion.value == -1 || dispatchFlags.disableEUFusion != !!streamProperties.frontEndState.disableEUFusion.value) &&
feSupportFlags.disableEuFusion) {
setMediaVFEStateDirty(true);
}
}
} // namespace NEO

View File

@ -38,6 +38,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::getCmdSizeForPrologue;
using BaseClass::getScratchPatchAddress;
using BaseClass::getScratchSpaceController;
using BaseClass::handleFrontEndStateTransition;
using BaseClass::indirectHeap;
using BaseClass::iohState;
using BaseClass::isBlitterDirectSubmissionEnabled;
@ -80,6 +81,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::downloadAllocationImpl;
using BaseClass::CommandStreamReceiver::executionEnvironment;
using BaseClass::CommandStreamReceiver::experimentalCmdBuffer;
using BaseClass::CommandStreamReceiver::feSupportFlags;
using BaseClass::CommandStreamReceiver::flushStamp;
using BaseClass::CommandStreamReceiver::globalFenceAllocation;
using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod;
@ -91,6 +93,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
using BaseClass::CommandStreamReceiver::isEnginePrologueSent;
using BaseClass::CommandStreamReceiver::isPreambleSent;
using BaseClass::CommandStreamReceiver::isStateSipSent;
using BaseClass::CommandStreamReceiver::lastAdditionalKernelExecInfo;
using BaseClass::CommandStreamReceiver::lastKernelExecutionType;
using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
using BaseClass::CommandStreamReceiver::lastMemoryCompressionState;

View File

@ -231,6 +231,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
using CommandStreamReceiver::clearColorAllocation;
using CommandStreamReceiver::commandStream;
using CommandStreamReceiver::dispatchMode;
using CommandStreamReceiver::feSupportFlags;
using CommandStreamReceiver::globalFenceAllocation;
using CommandStreamReceiver::isPreambleSent;
using CommandStreamReceiver::latestFlushedTaskCount;