From 5986a7199ab8218b9a9aff4b1d587029a81f6734 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Tue, 20 Sep 2022 16:46:15 +0000 Subject: [PATCH] Share front end state updates between regular and immediate command lists Related-To: NEO-5019 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 6 +- .../core/source/cmdqueue/cmdqueue_hw.inl | 6 +- .../unit_tests/fixtures/cmdlist_fixture.cpp | 3 + .../unit_tests/fixtures/cmdlist_fixture.h | 1 + .../core/test/unit_tests/mocks/mock_cmdlist.h | 1 + .../sources/cmdlist/test_cmdlist_6.cpp | 2 +- .../sources/cmdlist/test_cmdlist_7.cpp | 246 ++++++++++++++++++ .../command_queue/command_queue_tests.cpp | 3 + .../command_stream_receiver_hw_1_tests.cpp | 115 ++++++++ .../ult_command_stream_receiver_fixture.h | 1 + .../command_stream_receiver.cpp | 4 +- .../command_stream/command_stream_receiver.h | 1 + .../command_stream_receiver_hw.h | 1 + .../command_stream_receiver_hw_base.inl | 43 ++- .../libult/ult_command_stream_receiver.h | 3 + .../mocks/mock_command_stream_receiver.h | 1 + 16 files changed, 415 insertions(+), 22 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index af111ef8e4..24979c6d10 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2315,12 +2315,10 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; auto &hwInfo = device->getHwInfo(); - const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); - auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo); auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes; if (!containsAnyKernel) { - requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo); + requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo); requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo); finalStreamState = requiredStreamState; requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo); @@ -2340,7 +2338,7 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel hwInfo); } - finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo); + finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo); bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get(); if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) { if (isPatchingVfeStateAllowed) { diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index c96be656f3..d7da5a7bfb 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -622,18 +622,16 @@ size_t CommandQueueHw::estimateLinearStreamSizeInitial( template void CommandQueueHw::setFrontEndStateProperties(CommandListExecutionContext &ctx) { const auto &hwInfo = this->device->getHwInfo(); - const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); - auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo); auto isEngineInstanced = csr->getOsContext().isEngineInstanced(); auto &streamProperties = this->csr->getStreamProperties(); if (!frontEndTrackingEnabled()) { streamProperties.frontEndState.setProperties(ctx.anyCommandListWithCooperativeKernels, ctx.anyCommandListRequiresDisabledEUFusion, - disableOverdispatch, isEngineInstanced, hwInfo); + true, isEngineInstanced, hwInfo); + ctx.frontEndStateDirty |= (streamProperties.frontEndState.isDirty() && !this->csr->getLogicalStateHelper()); } else { ctx.engineInstanced = isEngineInstanced; } - ctx.frontEndStateDirty |= (streamProperties.frontEndState.isDirty() && !this->csr->getLogicalStateHelper()); ctx.frontEndStateDirty |= csr->getMediaVFEStateDirty(); } diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp index 568d4b97a1..3d9881c42c 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.cpp @@ -97,6 +97,8 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) { NEO::EngineGroupType engineGroupType = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getEngineGroupType(neoDevice->getDefaultEngine().getEngineType(), neoDevice->getDefaultEngine().getEngineUsage(), device->getHwInfo()); commandList.reset(whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue))); + commandListImmediate.reset(whiteboxCast(CommandList::createImmediate(productFamily, device, &queueDesc, false, engineGroupType, returnValue))); + commandListImmediate->isFlushTaskSubmissionEnabled = true; mockKernelImmData = std::make_unique(0u); createModuleFromMockBinary(0u, false, mockKernelImmData.get()); @@ -108,6 +110,7 @@ void ModuleMutableCommandListFixture::setUp(uint32_t revision) { void ModuleMutableCommandListFixture::tearDown() { commandQueue->destroy(); commandList.reset(nullptr); + commandListImmediate.reset(nullptr); kernel.reset(nullptr); mockKernelImmData.reset(nullptr); ModuleImmutableDataFixture::tearDown(); diff --git a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h index 874ecabafd..742222da43 100644 --- a/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h @@ -75,6 +75,7 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture { std::unique_ptr mockKernelImmData; std::unique_ptr commandList; + std::unique_ptr commandListImmediate; std::unique_ptr kernel; L0::ult::CommandQueue *commandQueue; }; diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index e9620392b2..52159996df 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -146,6 +146,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp { using BaseClass::commandListPreemptionMode; using BaseClass::csr; using BaseClass::initialize; + using BaseClass::isFlushTaskSubmissionEnabled; using BaseClass::multiReturnPointCommandList; using BaseClass::nonImmediateLogicalStateHelper; using BaseClass::partitionCount; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp index 066414f006..a18592c84a 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_6.cpp @@ -76,7 +76,7 @@ HWTEST2_F(CommandListExecuteImmediate, whenExecutingCommandListImmediateWithFlus NEO::FrontEndPropertiesSupport frontEndPropertiesSupport = {}; hwInfoConfig.fillFrontEndPropertiesSupportStructure(frontEndPropertiesSupport, device->getHwInfo()); - int expectedDisableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(*defaultHwInfo); + int expectedDisableOverdispatch = frontEndPropertiesSupport.disableOverdispatch; int32_t expectedIsCoherencyRequired = scmPropertiesSupport.coherencyRequired ? hwHelper.forceNonGpuCoherencyWA(true) : -1; int expectedLargeGrfMode = scmPropertiesSupport.largeGrfMode ? 1 : -1; int expectedThreadArbitrationPolicy = scmPropertiesSupport.threadArbitrationPolicy ? NEO::ThreadArbitrationPolicy::RoundRobin : -1; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp index c8073a2778..201560d1d3 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_7.cpp @@ -1384,5 +1384,251 @@ HWTEST2_F(MultiReturnCommandListTest, } } +HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothRegularFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) { + using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE; + NEO::FrontEndPropertiesSupport fePropertiesSupport = {}; + NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo()); + + EXPECT_TRUE(commandList->multiReturnPointCommandList); + EXPECT_TRUE(commandListImmediate->multiReturnPointCommandList); + + auto ®ularCmdListStream = *commandList->commandContainer.getCommandStream(); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1; + + size_t usedBefore = regularCmdListStream.getUsed(); + ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t usedAfter = regularCmdListStream.getUsed(); + + auto ®ularCmdListRequiredState = commandList->getRequiredStreamState(); + auto ®ularCmdListFinalState = commandList->getFinalStreamState(); + + if (fePropertiesSupport.disableEuFusion) { + EXPECT_EQ(1, regularCmdListRequiredState.frontEndState.disableEUFusion.value); + EXPECT_EQ(1, regularCmdListFinalState.frontEndState.disableEUFusion.value); + } else { + EXPECT_EQ(-1, regularCmdListRequiredState.frontEndState.disableEUFusion.value); + EXPECT_EQ(-1, regularCmdListFinalState.frontEndState.disableEUFusion.value); + } + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(regularCmdListStream.getCpuBase(), usedBefore), + (usedAfter - usedBefore))); + auto feStateCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, feStateCmds.size()); + + auto &cmdQueueStream = commandQueue->commandStream; + auto cmdListHandle = commandList->toHandle(); + + usedBefore = cmdQueueStream.getUsed(); + result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + usedAfter = cmdQueueStream.getUsed(); + + auto cmdQueueCsr = commandQueue->getCsr(); + auto &csrProperties = cmdQueueCsr->getStreamProperties(); + + if (fePropertiesSupport.disableEuFusion) { + EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value); + } else { + EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value); + } + + cmdList.clear(); + feStateCmds.clear(); + + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(cmdQueueStream.getCpuBase(), usedBefore), + (usedAfter - usedBefore))); + feStateCmds = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(1u, feStateCmds.size()); + auto &feState = *genCmdCast(*feStateCmds[0]); + if (fePropertiesSupport.disableEuFusion) { + EXPECT_TRUE(NEO::UnitTestHelper::getDisableFusionStateFromFrontEndCommand(feState)); + } else { + EXPECT_FALSE(NEO::UnitTestHelper::getDisableFusionStateFromFrontEndCommand(feState)); + } + + auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream(); + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + auto &csrStream = ultCsr.commandStream; + + size_t csrUsedBefore = csrStream.getUsed(); + usedBefore = immediateCmdListStream.getUsed(); + result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + usedAfter = immediateCmdListStream.getUsed(); + size_t csrUsedAfter = csrStream.getUsed(); + + auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState(); + auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState(); + + if (fePropertiesSupport.disableEuFusion) { + EXPECT_EQ(1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value); + EXPECT_EQ(1, immediateCmdListFinalState.frontEndState.disableEUFusion.value); + } else { + EXPECT_EQ(-1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value); + EXPECT_EQ(-1, immediateCmdListFinalState.frontEndState.disableEUFusion.value); + } + + cmdList.clear(); + feStateCmds.clear(); + + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(immediateCmdListStream.getCpuBase(), usedBefore), + (usedAfter - usedBefore))); + feStateCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, feStateCmds.size()); + + auto immediateCsr = commandListImmediate->csr; + EXPECT_EQ(cmdQueueCsr, immediateCsr); + + if (fePropertiesSupport.disableEuFusion) { + EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value); + } else { + EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value); + } + + cmdList.clear(); + feStateCmds.clear(); + + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(csrStream.getCpuBase(), csrUsedBefore), + (csrUsedAfter - csrUsedBefore))); + feStateCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, feStateCmds.size()); +} + +HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothImmediateFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) { + using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE; + NEO::FrontEndPropertiesSupport fePropertiesSupport = {}; + NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo()); + + EXPECT_TRUE(commandList->multiReturnPointCommandList); + EXPECT_TRUE(commandListImmediate->multiReturnPointCommandList); + + auto cmdQueueCsr = commandQueue->getCsr(); + auto &csrProperties = cmdQueueCsr->getStreamProperties(); + + auto immediateCsr = commandListImmediate->csr; + EXPECT_EQ(cmdQueueCsr, immediateCsr); + + ze_group_count_t groupCount{1, 1, 1}; + CmdListKernelLaunchParams launchParams = {}; + mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1; + + auto &immediateCmdListStream = *commandListImmediate->commandContainer.getCommandStream(); + auto &ultCsr = neoDevice->getUltCommandStreamReceiver(); + auto &csrStream = ultCsr.commandStream; + + size_t csrUsedBefore = csrStream.getUsed(); + size_t usedBefore = immediateCmdListStream.getUsed(); + ze_result_t result = commandListImmediate->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + size_t usedAfter = immediateCmdListStream.getUsed(); + size_t csrUsedAfter = csrStream.getUsed(); + + auto &immediateCmdListRequiredState = commandListImmediate->getRequiredStreamState(); + auto &immediateCmdListFinalState = commandListImmediate->getFinalStreamState(); + + if (fePropertiesSupport.disableEuFusion) { + EXPECT_EQ(1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value); + EXPECT_EQ(1, immediateCmdListFinalState.frontEndState.disableEUFusion.value); + } else { + EXPECT_EQ(-1, immediateCmdListRequiredState.frontEndState.disableEUFusion.value); + EXPECT_EQ(-1, immediateCmdListFinalState.frontEndState.disableEUFusion.value); + } + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(immediateCmdListStream.getCpuBase(), usedBefore), + (usedAfter - usedBefore))); + auto feStateCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, feStateCmds.size()); + + if (fePropertiesSupport.disableEuFusion) { + EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value); + } else { + EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value); + } + + cmdList.clear(); + feStateCmds.clear(); + + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(csrStream.getCpuBase(), csrUsedBefore), + (csrUsedAfter - csrUsedBefore))); + feStateCmds = findAll(cmdList.begin(), cmdList.end()); + ASSERT_EQ(1u, feStateCmds.size()); + auto &feState = *genCmdCast(*feStateCmds[0]); + if (fePropertiesSupport.disableEuFusion) { + EXPECT_TRUE(NEO::UnitTestHelper::getDisableFusionStateFromFrontEndCommand(feState)); + } else { + EXPECT_FALSE(NEO::UnitTestHelper::getDisableFusionStateFromFrontEndCommand(feState)); + } + + auto ®ularCmdListStream = *commandList->commandContainer.getCommandStream(); + + usedBefore = regularCmdListStream.getUsed(); + result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + usedAfter = regularCmdListStream.getUsed(); + + auto ®ularCmdListRequiredState = commandList->getRequiredStreamState(); + auto ®ularCmdListFinalState = commandList->getFinalStreamState(); + + if (fePropertiesSupport.disableEuFusion) { + EXPECT_EQ(1, regularCmdListRequiredState.frontEndState.disableEUFusion.value); + EXPECT_EQ(1, regularCmdListFinalState.frontEndState.disableEUFusion.value); + } else { + EXPECT_EQ(-1, regularCmdListRequiredState.frontEndState.disableEUFusion.value); + EXPECT_EQ(-1, regularCmdListFinalState.frontEndState.disableEUFusion.value); + } + + cmdList.clear(); + feStateCmds.clear(); + + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(regularCmdListStream.getCpuBase(), usedBefore), + (usedAfter - usedBefore))); + feStateCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, feStateCmds.size()); + + auto &cmdQueueStream = commandQueue->commandStream; + auto cmdListHandle = commandList->toHandle(); + + usedBefore = cmdQueueStream.getUsed(); + result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + usedAfter = cmdQueueStream.getUsed(); + + if (fePropertiesSupport.disableEuFusion) { + EXPECT_EQ(1, csrProperties.frontEndState.disableEUFusion.value); + } else { + EXPECT_EQ(-1, csrProperties.frontEndState.disableEUFusion.value); + } + + cmdList.clear(); + feStateCmds.clear(); + + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(cmdQueueStream.getCpuBase(), usedBefore), + (usedAfter - usedBefore))); + feStateCmds = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, feStateCmds.size()); +} + } // namespace ult } // namespace L0 diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index ae06e2ed01..7e649f23f1 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_stream/stream_properties.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/basic_math.h" @@ -2418,6 +2419,8 @@ HWTEST_F(KernelExecutionTypesTests, givenKernelWithDifferentExecutionTypeWhileDo size_t gws[3] = {63, 0, 0}; auto &mockCsr = device->getUltCommandStreamReceiver(); + mockCsr.feSupportFlags.computeDispatchAllWalker = true; + pKernel->setKernelExecutionType(CL_KERNEL_EXEC_INFO_CONCURRENT_TYPE_INTEL); mockCmdQ->enqueueKernel(pKernel, 1, nullptr, gws, nullptr, 0, nullptr, nullptr); EXPECT_EQ(mockCsr.lastKernelExecutionType, KernelExecutionType::Concurrent); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 4d767978b4..874a1eccf3 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -1529,3 +1529,118 @@ HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSetWhe EXPECT_EQ(0u, pipeControl->getImmediateData()); EXPECT_EQ(gpuAddress, UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); } + +HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateNotInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) { + auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false; + commandStreamReceiver.feSupportFlags.disableEuFusion = false; + commandStreamReceiver.setMediaVFEStateDirty(false); + + commandStreamReceiver.feSupportFlags.disableOverdispatch = true; + dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotApplicable; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); + + dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); + + dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty()); + commandStreamReceiver.setMediaVFEStateDirty(false); + + commandStreamReceiver.feSupportFlags.disableOverdispatch = false; + commandStreamReceiver.lastAdditionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); + + commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true; + dispatchFlags.kernelExecutionType = KernelExecutionType::NotApplicable; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); + + dispatchFlags.kernelExecutionType = KernelExecutionType::Default; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); + + dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty()); + commandStreamReceiver.setMediaVFEStateDirty(false); + + commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false; + commandStreamReceiver.lastKernelExecutionType = KernelExecutionType::Default; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); + + commandStreamReceiver.feSupportFlags.disableEuFusion = true; + dispatchFlags.disableEUFusion = false; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty()); + commandStreamReceiver.setMediaVFEStateDirty(false); + + commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = 0; + dispatchFlags.disableEUFusion = true; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty()); + commandStreamReceiver.setMediaVFEStateDirty(false); + + dispatchFlags.disableEUFusion = false; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); + + commandStreamReceiver.feSupportFlags.disableEuFusion = false; + commandStreamReceiver.streamProperties.frontEndState.disableEUFusion.value = -1; + dispatchFlags.disableEUFusion = false; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); +} + +HWTEST_F(UltCommandStreamReceiverTest, givenFrontEndStateInitedWhenTransitionFrontEndPropertiesThenExpectCorrectValuesStored) { + auto dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); + + auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver(); + commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = false; + commandStreamReceiver.feSupportFlags.disableEuFusion = false; + commandStreamReceiver.setMediaVFEStateDirty(false); + + commandStreamReceiver.feSupportFlags.disableOverdispatch = true; + + commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 0; + dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); + + dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty()); + commandStreamReceiver.setMediaVFEStateDirty(false); + + commandStreamReceiver.streamProperties.frontEndState.disableOverdispatch.value = 1; + dispatchFlags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty()); + commandStreamReceiver.setMediaVFEStateDirty(false); + + commandStreamReceiver.feSupportFlags.disableOverdispatch = false; + commandStreamReceiver.feSupportFlags.computeDispatchAllWalker = true; + + commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 0; + dispatchFlags.kernelExecutionType = KernelExecutionType::Default; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_FALSE(commandStreamReceiver.getMediaVFEStateDirty()); + + dispatchFlags.kernelExecutionType = KernelExecutionType::Concurrent; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty()); + commandStreamReceiver.setMediaVFEStateDirty(false); + + commandStreamReceiver.streamProperties.frontEndState.computeDispatchAllWalkerEnable.value = 1; + dispatchFlags.kernelExecutionType = KernelExecutionType::Default; + commandStreamReceiver.handleFrontEndStateTransition(dispatchFlags); + EXPECT_TRUE(commandStreamReceiver.getMediaVFEStateDirty()); + commandStreamReceiver.setMediaVFEStateDirty(false); +} diff --git a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h index d065fa05bf..d3e032cfbd 100644 --- a/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h +++ b/opencl/test/unit_test/fixtures/ult_command_stream_receiver_fixture.h @@ -153,6 +153,7 @@ struct UltCommandStreamReceiverTest commandStreamReceiver.lastSentUseGlobalAtomics = false; commandStreamReceiver.streamProperties.stateComputeMode.setProperties(0, GrfConfig::DefaultGrfNumber, hwHelper.getDefaultThreadArbitrationPolicy(), pDevice->getPreemptionMode(), *defaultHwInfo); + commandStreamReceiver.streamProperties.frontEndState.setProperties(false, false, false, -1, *defaultHwInfo); auto logicalStateHelper = commandStreamReceiver.getLogicalStateHelper(); diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 7d8e7445ea..32a5532558 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -74,7 +74,9 @@ CommandStreamReceiver::CommandStreamReceiver(ExecutionEnvironment &executionEnvi this->staticWorkPartitioningEnabled = true; } - this->systolicModeConfigurable = HwInfoConfig::get(hwInfo.platform.eProductFamily)->isSystolicModeConfigurable(hwInfo); + auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); + this->systolicModeConfigurable = hwInfoConfig->isSystolicModeConfigurable(hwInfo); + hwInfoConfig->fillFrontEndPropertiesSupportStructure(feSupportFlags, hwInfo); } CommandStreamReceiver::~CommandStreamReceiver() { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 45d6bf0586..ea67b5def3 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -392,6 +392,7 @@ class CommandStreamReceiver { LinearStream commandStream; StreamProperties streamProperties{}; + FrontEndPropertiesSupport feSupportFlags{}; // offset for debug state is 1kbyte, tag writes can use multiple offsets for multiple partitions and each offset can vary per platform const uint64_t debugPauseStateAddressOffset = MemoryConstants::kiloByte; diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 9b8f43542a..0a587eefe3 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -177,6 +177,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { void unregisterDirectSubmissionFromController(); constexpr bool isGlobalAtomicsProgrammingRequired(bool currentValue) const; void createKernelArgsBufferAllocation() override; + void handleFrontEndStateTransition(DispatchFlags &dispatchFlags); HeapDirtyState dshState; HeapDirtyState iohState; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 39ad964412..79e692e2a5 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -308,13 +308,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } if (!logicalStateHelper) { - if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo) { - setMediaVFEStateDirty(true); - } - - if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType) { - setMediaVFEStateDirty(true); - } + handleFrontEndStateTransition(dispatchFlags); } auto &commandStreamCSR = this->getCS(getRequiredCmdStreamSizeAligned(dispatchFlags, device)); @@ -970,14 +964,14 @@ inline void CommandStreamReceiverHw::programVFEState(LinearStream &cs lastKernelExecutionType = dispatchFlags.kernelExecutionType; } auto &hwInfo = peekHwInfo(); + + auto isCooperative = dispatchFlags.kernelExecutionType == KernelExecutionType::Concurrent; + auto disableOverdispatch = (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet); + streamProperties.frontEndState.setProperties(isCooperative, dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced(), hwInfo); + auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); - const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); auto engineGroupType = hwHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo); auto pVfeState = PreambleHelper::getSpaceForVfeState(&csr, hwInfo, engineGroupType); - auto disableOverdispatch = hwInfoConfig.isDisableOverdispatchAvailable(hwInfo) && - (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet); - streamProperties.frontEndState.setProperties(lastKernelExecutionType == KernelExecutionType::Concurrent, - dispatchFlags.disableEUFusion, disableOverdispatch, osContext->isEngineInstanced(), hwInfo); PreambleHelper::programVfeState( pVfeState, hwInfo, requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads, streamProperties, logicalStateHelper.get()); @@ -1477,4 +1471,29 @@ void CommandStreamReceiverHw::initializeDeviceWithFirstSubmission() { this->flushSmallTask(commandStream, commandStreamStart); } +template +void CommandStreamReceiverHw::handleFrontEndStateTransition(DispatchFlags &dispatchFlags) { + if (streamProperties.frontEndState.disableOverdispatch.value != -1) { + lastAdditionalKernelExecInfo = streamProperties.frontEndState.disableOverdispatch.value == 1 ? AdditionalKernelExecInfo::DisableOverdispatch : AdditionalKernelExecInfo::NotSet; + } + if (streamProperties.frontEndState.computeDispatchAllWalkerEnable.value != -1) { + lastKernelExecutionType = streamProperties.frontEndState.computeDispatchAllWalkerEnable.value == 1 ? KernelExecutionType::Concurrent : KernelExecutionType::Default; + } + + if (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotApplicable && lastAdditionalKernelExecInfo != dispatchFlags.additionalKernelExecInfo && + feSupportFlags.disableOverdispatch) { + setMediaVFEStateDirty(true); + } + + if (dispatchFlags.kernelExecutionType != KernelExecutionType::NotApplicable && lastKernelExecutionType != dispatchFlags.kernelExecutionType && + feSupportFlags.computeDispatchAllWalker) { + setMediaVFEStateDirty(true); + } + + if ((streamProperties.frontEndState.disableEUFusion.value == -1 || dispatchFlags.disableEUFusion != !!streamProperties.frontEndState.disableEUFusion.value) && + feSupportFlags.disableEuFusion) { + setMediaVFEStateDirty(true); + } +} + } // namespace NEO diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 88185ce215..bbb4023307 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -38,6 +38,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::getCmdSizeForPrologue; using BaseClass::getScratchPatchAddress; using BaseClass::getScratchSpaceController; + using BaseClass::handleFrontEndStateTransition; using BaseClass::indirectHeap; using BaseClass::iohState; using BaseClass::isBlitterDirectSubmissionEnabled; @@ -80,6 +81,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::downloadAllocationImpl; using BaseClass::CommandStreamReceiver::executionEnvironment; using BaseClass::CommandStreamReceiver::experimentalCmdBuffer; + using BaseClass::CommandStreamReceiver::feSupportFlags; using BaseClass::CommandStreamReceiver::flushStamp; using BaseClass::CommandStreamReceiver::globalFenceAllocation; using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod; @@ -91,6 +93,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::isEnginePrologueSent; using BaseClass::CommandStreamReceiver::isPreambleSent; using BaseClass::CommandStreamReceiver::isStateSipSent; + using BaseClass::CommandStreamReceiver::lastAdditionalKernelExecInfo; using BaseClass::CommandStreamReceiver::lastKernelExecutionType; using BaseClass::CommandStreamReceiver::lastMediaSamplerConfig; using BaseClass::CommandStreamReceiver::lastMemoryCompressionState; diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 19ba854243..7331b23c8a 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -231,6 +231,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw { using CommandStreamReceiver::clearColorAllocation; using CommandStreamReceiver::commandStream; using CommandStreamReceiver::dispatchMode; + using CommandStreamReceiver::feSupportFlags; using CommandStreamReceiver::globalFenceAllocation; using CommandStreamReceiver::isPreambleSent; using CommandStreamReceiver::latestFlushedTaskCount;