Share front end state updates between regular and immediate command lists
Related-To: NEO-5019 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
7f0619e6b9
commit
5986a7199a
|
@ -2315,12 +2315,10 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
|||
using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE;
|
||||
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo);
|
||||
|
||||
auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes;
|
||||
if (!containsAnyKernel) {
|
||||
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
|
||||
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo);
|
||||
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
|
||||
finalStreamState = requiredStreamState;
|
||||
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
|
||||
|
@ -2340,7 +2338,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
|||
hwInfo);
|
||||
}
|
||||
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo);
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
|
||||
if (isPatchingVfeStateAllowed) {
|
||||
|
|
|
@ -622,18 +622,16 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
|
|||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::setFrontEndStateProperties(CommandListExecutionContext &ctx) {
|
||||
const auto &hwInfo = this->device->getHwInfo();
|
||||
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo);
|
||||
|
||||
auto isEngineInstanced = csr->getOsContext().isEngineInstanced();
|
||||
auto &streamProperties = this->csr->getStreamProperties();
|
||||
if (!frontEndTrackingEnabled()) {
|
||||
streamProperties.frontEndState.setProperties(ctx.anyCommandListWithCooperativeKernels, ctx.anyCommandListRequiresDisabledEUFusion,
|
||||
disableOverdispatch, isEngineInstanced, hwInfo);
|
||||
true, isEngineInstanced, hwInfo);
|
||||
ctx.frontEndStateDirty |= (streamProperties.frontEndState.isDirty() && !this->csr->getLogicalStateHelper());
|
||||
} else {
|
||||
ctx.engineInstanced = isEngineInstanced;
|
||||
}
|
||||
ctx.frontEndStateDirty |= (streamProperties.frontEndState.isDirty() && !this->csr->getLogicalStateHelper());
|
||||
ctx.frontEndStateDirty |= csr->getMediaVFEStateDirty();
|
||||
}
|
||||
|
||||
|
|
|
@ -97,6 +97,8 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) {
|
|||
NEO::EngineGroupType engineGroupType = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getEngineGroupType(neoDevice->getDefaultEngine().getEngineType(), neoDevice->getDefaultEngine().getEngineUsage(), device->getHwInfo());
|
||||
|
||||
commandList.reset(whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue)));
|
||||
commandListImmediate.reset(whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, engineGroupType, returnValue)));
|
||||
commandListImmediate->isFlushTaskSubmissionEnabled = true;
|
||||
|
||||
mockKernelImmData = std::make_unique<MockImmutableData>(0u);
|
||||
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
|
||||
|
@ -108,6 +110,7 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) {
|
|||
void ModuleMutableCommandListFixture::tearDown() {
|
||||
commandQueue->destroy();
|
||||
commandList.reset(nullptr);
|
||||
commandListImmediate.reset(nullptr);
|
||||
kernel.reset(nullptr);
|
||||
mockKernelImmData.reset(nullptr);
|
||||
ModuleImmutableDataFixture::tearDown();
|
||||
|
|
|
@ -75,6 +75,7 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture {
|
|||
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmData;
|
||||
std::unique_ptr<L0::ult::CommandList> commandList;
|
||||
std::unique_ptr<L0::ult::CommandList> commandListImmediate;
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
L0::ult::CommandQueue *commandQueue;
|
||||
};
|
||||
|
|
|
@ -146,6 +146,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
|
|||
using BaseClass::commandListPreemptionMode;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::initialize;
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::multiReturnPointCommandList;
|
||||
using BaseClass::nonImmediateLogicalStateHelper;
|
||||
using BaseClass::partitionCount;
|
||||
|
|
|
@ -76,7 +76,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus
|
|||
NEO::FrontEndPropertiesSupport frontEndPropertiesSupport = {};
|
||||
hwInfoConfig.fillFrontEndPropertiesSupportStructure(frontEndPropertiesSupport, device->getHwInfo());
|
||||
|
||||
int expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*defaultHwInfo);
|
||||
int expectedDisableOverdispatch = frontEndPropertiesSupport.disableOverdispatch;
|
||||
int32_t expectedIsCoherencyRequired = scmPropertiesSupport.coherencyRequired ? hwHelper.forceNonGpuCoherencyWA(true) : -1;
|
||||
int expectedLargeGrfMode = scmPropertiesSupport.largeGrfMode ? 1 : -1;
|
||||
int expectedThreadArbitrationPolicy = scmPropertiesSupport.threadArbitrationPolicy ? NEO::ThreadArbitrationPolicy::RoundRobin : -1;
|
||||
|
|
|
@ -1384,5 +1384,251 @@ HWTEST2_F(MultiReturnCommandListTest,
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothRegularFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) {
|
||||
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
|
||||
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
|
||||
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
|
||||
|
||||
EXPECT_TRUE(commandList->multiReturnPointCommandList);
|
||||
EXPECT_TRUE(commandListImmediate->multiReturnPointCommandList);
|
||||
|
||||
auto ®ularCmdListStream = *commandList->commandContainer.getCommandStream();
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
|
||||
|
||||
size_t usedBefore = regularCmdListStream.getUsed();
|
||||
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
size_t usedAfter = regularCmdListStream.getUsed();
|
||||
|
||||
auto ®ularCmdListRequiredState = commandList->getRequiredStreamState();
|
||||
auto ®ularCmdListFinalState = commandList->getFinalStreamState();
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(1, regularCmdListRequiredState.frontEndState.disableEUFusion.value);
|
||||
EXPECT_EQ(1, regularCmdListFinalState.frontEndState.disableEUFusion.value);
|
||||
} else {
|
||||
EXPECT_EQ(-1, regularCmdListRequiredState.frontEndState.disableEUFusion.value);
|
||||
EXPECT_EQ(-1, regularCmdListFinalState.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(regularCmdListStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
auto feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, feStateCmds.size());
|
||||
|
||||
auto &cmdQueueStream = commandQueue->commandStream;
|
||||
auto cmdListHandle = commandList->toHandle();
|
||||
|
||||
usedBefore = cmdQueueStream.getUsed();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
usedAfter = cmdQueueStream.getUsed();
|
||||
|
||||
auto cmdQueueCsr = commandQueue->getCsr();
|
||||
auto &csrProperties = cmdQueueCsr->getStreamProperties();
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value);
|
||||
} else {
|
||||
EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
|
||||
cmdList.clear();
|
||||
feStateCmds.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdQueueStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, feStateCmds.size());
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateCmds[0]);
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
|
||||
} else {
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
|
||||
}
|
||||
|
||||
auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream();
|
||||
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto &csrStream = ultCsr.commandStream;
|
||||
|
||||
size_t csrUsedBefore = csrStream.getUsed();
|
||||
usedBefore = immediateCmdListStream.getUsed();
|
||||
result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
usedAfter = immediateCmdListStream.getUsed();
|
||||
size_t csrUsedAfter = csrStream.getUsed();
|
||||
|
||||
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
|
||||
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
|
||||
EXPECT_EQ(1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
|
||||
} else {
|
||||
EXPECT_EQ(-1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
|
||||
EXPECT_EQ(-1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
|
||||
cmdList.clear();
|
||||
feStateCmds.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(immediateCmdListStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, feStateCmds.size());
|
||||
|
||||
auto immediateCsr = commandListImmediate->csr;
|
||||
EXPECT_EQ(cmdQueueCsr, immediateCsr);
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value);
|
||||
} else {
|
||||
EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
|
||||
cmdList.clear();
|
||||
feStateCmds.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
|
||||
(csrUsedAfter - csrUsedBefore)));
|
||||
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, feStateCmds.size());
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothImmediateFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) {
|
||||
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
|
||||
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
|
||||
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
|
||||
|
||||
EXPECT_TRUE(commandList->multiReturnPointCommandList);
|
||||
EXPECT_TRUE(commandListImmediate->multiReturnPointCommandList);
|
||||
|
||||
auto cmdQueueCsr = commandQueue->getCsr();
|
||||
auto &csrProperties = cmdQueueCsr->getStreamProperties();
|
||||
|
||||
auto immediateCsr = commandListImmediate->csr;
|
||||
EXPECT_EQ(cmdQueueCsr, immediateCsr);
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
|
||||
|
||||
auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream();
|
||||
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto &csrStream = ultCsr.commandStream;
|
||||
|
||||
size_t csrUsedBefore = csrStream.getUsed();
|
||||
size_t usedBefore = immediateCmdListStream.getUsed();
|
||||
ze_result_t result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
size_t usedAfter = immediateCmdListStream.getUsed();
|
||||
size_t csrUsedAfter = csrStream.getUsed();
|
||||
|
||||
auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState();
|
||||
auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState();
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
|
||||
EXPECT_EQ(1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
|
||||
} else {
|
||||
EXPECT_EQ(-1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value);
|
||||
EXPECT_EQ(-1, immediateCmdListFinalState.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(immediateCmdListStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
auto feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, feStateCmds.size());
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value);
|
||||
} else {
|
||||
EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
|
||||
cmdList.clear();
|
||||
feStateCmds.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(csrStream.getCpuBase(), csrUsedBefore),
|
||||
(csrUsedAfter - csrUsedBefore)));
|
||||
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, feStateCmds.size());
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateCmds[0]);
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
|
||||
} else {
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
|
||||
}
|
||||
|
||||
auto ®ularCmdListStream = *commandList->commandContainer.getCommandStream();
|
||||
|
||||
usedBefore = regularCmdListStream.getUsed();
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
usedAfter = regularCmdListStream.getUsed();
|
||||
|
||||
auto ®ularCmdListRequiredState = commandList->getRequiredStreamState();
|
||||
auto ®ularCmdListFinalState = commandList->getFinalStreamState();
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(1, regularCmdListRequiredState.frontEndState.disableEUFusion.value);
|
||||
EXPECT_EQ(1, regularCmdListFinalState.frontEndState.disableEUFusion.value);
|
||||
} else {
|
||||
EXPECT_EQ(-1, regularCmdListRequiredState.frontEndState.disableEUFusion.value);
|
||||
EXPECT_EQ(-1, regularCmdListFinalState.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
|
||||
cmdList.clear();
|
||||
feStateCmds.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(regularCmdListStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, feStateCmds.size());
|
||||
|
||||
auto &cmdQueueStream = commandQueue->commandStream;
|
||||
auto cmdListHandle = commandList->toHandle();
|
||||
|
||||
usedBefore = cmdQueueStream.getUsed();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
usedAfter = cmdQueueStream.getUsed();
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value);
|
||||
} else {
|
||||
EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value);
|
||||
}
|
||||
|
||||
cmdList.clear();
|
||||
feStateCmds.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdQueueStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
feStateCmds = findAll<VFE_STATE_TYPE *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, feStateCmds.size());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
*/
|
||||
|
||||
#include "shared/source/command_stream/command_stream_receiver.h"
|
||||
#include "shared/source/command_stream/stream_properties.h"
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
|
@ -2418,6 +2419,8 @@ HWTEST_F(KernelExecutionTypesTests, givenKernelWithDifferentExecutionTypeWhileDo
|
|||
size_t gws[3] = {63, 0, 0};
|
||||
auto &mockCsr = device->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
mockCsr.feSupportFlags.computeDispatchAllWalker = true;
|
||||
|
||||
pKernel->setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL);
|
||||
mockCmdQ->enqueueKernel(pKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr);
|
||||
EXPECT_EQ(mockCsr.lastKernelExecutionType, KernelExecutionType::Concurrent);
|
||||
|
|
|
@ -1529,3 +1529,118 @@ HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSetWhe
|
|||
EXPECT_EQ(0u, pipeControl->getImmediateData());
|
||||
EXPECT_EQ(gpuAddress, UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControl));
|
||||
}
|
||||
|
||||
HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateNotInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) {
|
||||
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
|
||||
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
commandStreamReceiver.feSupportFlags.disableOverdispatch = true;
|
||||
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotApplicable;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
|
||||
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
|
||||
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
commandStreamReceiver.feSupportFlags.disableOverdispatch = false;
|
||||
commandStreamReceiver.lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
|
||||
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true;
|
||||
dispatchFlags.kernelExecutionType = KernelExecutionType::NotApplicable;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
|
||||
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
|
||||
dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
|
||||
commandStreamReceiver.lastKernelExecutionType = KernelExecutionType::Default;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
|
||||
commandStreamReceiver.feSupportFlags.disableEuFusion = true;
|
||||
dispatchFlags.disableEUFusion = false;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = 0;
|
||||
dispatchFlags.disableEUFusion = true;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
dispatchFlags.disableEUFusion = false;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
|
||||
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
|
||||
commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = -1;
|
||||
dispatchFlags.disableEUFusion = false;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
}
|
||||
|
||||
HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) {
|
||||
auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false;
|
||||
commandStreamReceiver.feSupportFlags.disableEuFusion = false;
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
commandStreamReceiver.feSupportFlags.disableOverdispatch = true;
|
||||
|
||||
commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 0;
|
||||
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
|
||||
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 1;
|
||||
dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
commandStreamReceiver.feSupportFlags.disableOverdispatch = false;
|
||||
commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true;
|
||||
|
||||
commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 0;
|
||||
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
|
||||
dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
|
||||
commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 1;
|
||||
dispatchFlags.kernelExecutionType = KernelExecutionType::Default;
|
||||
commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags);
|
||||
EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty());
|
||||
commandStreamReceiver.setMediaVFEStateDirty(false);
|
||||
}
|
||||
|
|
|
@ -153,6 +153,7 @@ struct UltCommandStreamReceiverTest
|
|||
commandStreamReceiver.lastSentUseGlobalAtomics = false;
|
||||
commandStreamReceiver.streamProperties.stateComputeMode.setProperties(0, GrfConfig::DefaultGrfNumber,
|
||||
hwHelper.getDefaultThreadArbitrationPolicy(), pDevice->getPreemptionMode(), *defaultHwInfo);
|
||||
commandStreamReceiver.streamProperties.frontEndState.setProperties(false, false, false, -1, *defaultHwInfo);
|
||||
|
||||
auto logicalStateHelper = commandStreamReceiver.getLogicalStateHelper();
|
||||
|
||||
|
|
|
@ -74,7 +74,9 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi
|
|||
this->staticWorkPartitioningEnabled = true;
|
||||
}
|
||||
|
||||
this->systolicModeConfigurable = HwInfoConfig::get(hwInfo.platform.eProductFamily)->isSystolicModeConfigurable(hwInfo);
|
||||
auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
this->systolicModeConfigurable = hwInfoConfig->isSystolicModeConfigurable(hwInfo);
|
||||
hwInfoConfig->fillFrontEndPropertiesSupportStructure(feSupportFlags, hwInfo);
|
||||
}
|
||||
|
||||
CommandStreamReceiver::~CommandStreamReceiver() {
|
||||
|
|
|
@ -392,6 +392,7 @@ class CommandStreamReceiver {
|
|||
|
||||
LinearStream commandStream;
|
||||
StreamProperties streamProperties{};
|
||||
FrontEndPropertiesSupport feSupportFlags{};
|
||||
|
||||
// offset for debug state is 1kbyte, tag writes can use multiple offsets for multiple partitions and each offset can vary per platform
|
||||
const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte;
|
||||
|
|
|
@ -177,6 +177,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
|||
void unregisterDirectSubmissionFromController();
|
||||
constexpr bool isGlobalAtomicsProgrammingRequired(bool currentValue) const;
|
||||
void createKernelArgsBufferAllocation() override;
|
||||
void handleFrontEndStateTransition(DispatchFlags &dispatchFlags);
|
||||
|
||||
HeapDirtyState dshState;
|
||||
HeapDirtyState iohState;
|
||||
|
|
|
@ -308,13 +308,7 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
|||
}
|
||||
|
||||
if (!logicalStateHelper) {
|
||||
if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo) {
|
||||
setMediaVFEStateDirty(true);
|
||||
}
|
||||
|
||||
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType) {
|
||||
setMediaVFEStateDirty(true);
|
||||
}
|
||||
handleFrontEndStateTransition(dispatchFlags);
|
||||
}
|
||||
|
||||
auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device));
|
||||
|
@ -970,14 +964,14 @@ inline void CommandStreamReceiverHw<GfxFamily>::programVFEState(LinearStream &cs
|
|||
lastKernelExecutionType = dispatchFlags.kernelExecutionType;
|
||||
}
|
||||
auto &hwInfo = peekHwInfo();
|
||||
|
||||
auto isCooperative = dispatchFlags.kernelExecutionType == KernelExecutionType::Concurrent;
|
||||
auto disableOverdispatch = (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet);
|
||||
streamProperties.frontEndState.setProperties(isCooperative, dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced(), hwInfo);
|
||||
|
||||
auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily);
|
||||
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
|
||||
auto engineGroupType = hwHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo);
|
||||
auto pVfeState = PreambleHelper<GfxFamily>::getSpaceForVfeState(&csr, hwInfo, engineGroupType);
|
||||
auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo) &&
|
||||
(dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet);
|
||||
streamProperties.frontEndState.setProperties(lastKernelExecutionType == KernelExecutionType::Concurrent,
|
||||
dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced(), hwInfo);
|
||||
PreambleHelper<GfxFamily>::programVfeState(
|
||||
pVfeState, hwInfo, requiredScratchSize, getScratchPatchAddress(),
|
||||
maxFrontEndThreads, streamProperties, logicalStateHelper.get());
|
||||
|
@ -1477,4 +1471,29 @@ void CommandStreamReceiverHw<GfxFamily>::initializeDeviceWithFirstSubmission() {
|
|||
this->flushSmallTask(commandStream, commandStreamStart);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::handleFrontEndStateTransition(DispatchFlags &dispatchFlags) {
|
||||
if (streamProperties.frontEndState.disableOverdispatch.value != -1) {
|
||||
lastAdditionalKernelExecInfo = streamProperties.frontEndState.disableOverdispatch.value == 1 ? AdditionalKernelExecInfo::DisableOverdispatch : AdditionalKernelExecInfo::NotSet;
|
||||
}
|
||||
if (streamProperties.frontEndState.computeDispatchAllWalkerEnable.value != -1) {
|
||||
lastKernelExecutionType = streamProperties.frontEndState.computeDispatchAllWalkerEnable.value == 1 ? KernelExecutionType::Concurrent : KernelExecutionType::Default;
|
||||
}
|
||||
|
||||
if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo &&
|
||||
feSupportFlags.disableOverdispatch) {
|
||||
setMediaVFEStateDirty(true);
|
||||
}
|
||||
|
||||
if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType &&
|
||||
feSupportFlags.computeDispatchAllWalker) {
|
||||
setMediaVFEStateDirty(true);
|
||||
}
|
||||
|
||||
if ((streamProperties.frontEndState.disableEUFusion.value == -1 || dispatchFlags.disableEUFusion != !!streamProperties.frontEndState.disableEUFusion.value) &&
|
||||
feSupportFlags.disableEuFusion) {
|
||||
setMediaVFEStateDirty(true);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
|
|
@ -38,6 +38,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
|||
using BaseClass::getCmdSizeForPrologue;
|
||||
using BaseClass::getScratchPatchAddress;
|
||||
using BaseClass::getScratchSpaceController;
|
||||
using BaseClass::handleFrontEndStateTransition;
|
||||
using BaseClass::indirectHeap;
|
||||
using BaseClass::iohState;
|
||||
using BaseClass::isBlitterDirectSubmissionEnabled;
|
||||
|
@ -80,6 +81,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
|||
using BaseClass::CommandStreamReceiver::downloadAllocationImpl;
|
||||
using BaseClass::CommandStreamReceiver::executionEnvironment;
|
||||
using BaseClass::CommandStreamReceiver::experimentalCmdBuffer;
|
||||
using BaseClass::CommandStreamReceiver::feSupportFlags;
|
||||
using BaseClass::CommandStreamReceiver::flushStamp;
|
||||
using BaseClass::CommandStreamReceiver::globalFenceAllocation;
|
||||
using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod;
|
||||
|
@ -91,6 +93,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
|||
using BaseClass::CommandStreamReceiver::isEnginePrologueSent;
|
||||
using BaseClass::CommandStreamReceiver::isPreambleSent;
|
||||
using BaseClass::CommandStreamReceiver::isStateSipSent;
|
||||
using BaseClass::CommandStreamReceiver::lastAdditionalKernelExecInfo;
|
||||
using BaseClass::CommandStreamReceiver::lastKernelExecutionType;
|
||||
using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig;
|
||||
using BaseClass::CommandStreamReceiver::lastMemoryCompressionState;
|
||||
|
|
|
@ -231,6 +231,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
|
|||
using CommandStreamReceiver::clearColorAllocation;
|
||||
using CommandStreamReceiver::commandStream;
|
||||
using CommandStreamReceiver::dispatchMode;
|
||||
using CommandStreamReceiver::feSupportFlags;
|
||||
using CommandStreamReceiver::globalFenceAllocation;
|
||||
using CommandStreamReceiver::isPreambleSent;
|
||||
using CommandStreamReceiver::latestFlushedTaskCount;
|
||||
|
|
Loading…
Reference in New Issue