From 841267ecbd310d8da4d32318b788f1216cc1e252 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Mon, 4 Aug 2025 13:36:33 +0000 Subject: [PATCH] feature: save command buffer gpu address for front end command in command list Related-To: NEO-15376 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdlist/cmdlist.h | 4 ++++ level_zero/core/source/cmdlist/cmdlist_hw.inl | 6 ++++- .../core/source/cmdlist/command_to_patch.h | 1 + .../core/source/cmdqueue/cmdqueue_hw.inl | 4 ++-- .../core/test/unit_tests/mocks/mock_cmdlist.h | 1 + .../test_cmdlist_append_launch_kernel_1.cpp | 19 ++++++++++++++++ .../helpers/test_preamble_dg2_and_later.cpp | 6 ++--- .../helpers/test_preamble_xe3_and_later.cpp | 2 +- .../helpers/test_preamble_xehp_and_later.cpp | 6 ++--- .../test_preamble_xe2_hpg_core.cpp | 4 ++-- .../test_cmds_programming_xe_hpg_core.cpp | 6 ++--- .../command_stream_receiver_hw_base.inl | 4 ++-- shared/source/gen12lp/preamble_gen12lp.cpp | 9 ++++++-- shared/source/helpers/preamble.h | 3 ++- .../helpers/preamble_xehp_and_later.inl | 9 ++++++-- .../gen12lp/test_preamble_gen12lp.cpp | 9 +++++--- .../unit_test/preamble/preamble_tests.cpp | 8 +++++-- .../preamble/test_preamble_xe2_and_later.cpp | 16 +++++++++----- .../preamble/test_preamble_xe_hpg_core.cpp | 12 +++++++--- .../xe_hpc_core/pvc/test_preamble_pvc.cpp | 4 ++-- .../xe_hpc_core/test_preamble_xe_hpc_core.cpp | 22 +++++++++++-------- 21 files changed, 108 insertions(+), 47 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index df3c81ea97..440b6bda5b 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -463,6 +463,9 @@ struct CommandList : _ze_command_list_handle_t { this->isWalkerWithProfilingEnqueued = false; return retVal; } + uint32_t getFrontEndPatchListCount() const { + return frontEndPatchListCount; + } protected: NEO::GraphicsAllocation *getAllocationFromHostPtrMap(const void *buffer, uint64_t bufferSize, bool copyOffload); @@ -525,6 +528,7 @@ struct CommandList : _ze_command_list_handle_t { uint32_t defaultMocsIndex = 0; int32_t defaultPipelinedThreadArbitrationPolicy = NEO::ThreadArbitrationPolicy::NotPresent; uint32_t maxLocalSubRegionSize = 0; + uint32_t frontEndPatchListCount = 0; bool isSyncModeQueue = false; bool isTbxMode = false; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index c2fd2c0cb7..3b433a9478 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -3741,12 +3741,15 @@ void CommandListCoreFamily::appendVfeStateCmdToPatch() { if constexpr (GfxFamily::isHeaplessRequired() == false) { auto &rootDeviceEnvironment = device->getNEODevice()->getRootDeviceEnvironment(); using FrontEndStateCommand = typename GfxFamily::FrontEndStateCommand; - auto frontEndStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType); + uint64_t gpuAddress = 0; + auto frontEndStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType, &gpuAddress); auto frontEndStateCmd = new FrontEndStateCommand; NEO::PreambleHelper::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState); commandsToPatch.push_back({.pDestination = frontEndStateAddress, .pCommand = frontEndStateCmd, + .gpuAddress = gpuAddress, .type = CommandToPatch::FrontEndState}); + this->frontEndPatchListCount++; } } @@ -3940,6 +3943,7 @@ void CommandListCoreFamily::clearCommandsToPatch() { } commandsToPatch.clear(); } + this->frontEndPatchListCount = 0; } template diff --git a/level_zero/core/source/cmdlist/command_to_patch.h b/level_zero/core/source/cmdlist/command_to_patch.h index 0f3e30417b..f9f8bf1a77 100644 --- a/level_zero/core/source/cmdlist/command_to_patch.h +++ b/level_zero/core/source/cmdlist/command_to_patch.h @@ -37,6 +37,7 @@ struct CommandToPatch { void *pDestination = nullptr; void *pCommand = nullptr; uint64_t baseAddress = 0; + uint64_t gpuAddress = 0; mutable uint64_t scratchAddressAfterPatch = 0; size_t offset = 0; size_t inOrderPatchListIndex = 0; diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index ce3b374241..d6768ae23b 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -587,8 +587,8 @@ void CommandQueueHw::programFrontEnd(uint64_t scratchAddress, uin auto &gfxCoreHelper = device->getGfxCoreHelper(); auto engineGroupType = gfxCoreHelper.getEngineGroupType(csr->getOsContext().getEngineType(), csr->getOsContext().getEngineUsage(), hwInfo); - auto pVfeState = NEO::PreambleHelper::getSpaceForVfeState(&cmdStream, hwInfo, engineGroupType); - NEO::PreambleHelper::programVfeState(pVfeState, + auto feState = NEO::PreambleHelper::getSpaceForVfeState(&cmdStream, hwInfo, engineGroupType, nullptr); + NEO::PreambleHelper::programVfeState(feState, device->getNEODevice()->getRootDeviceEnvironment(), perThreadScratchSpaceSlot0Size, scratchAddress, diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index ea3c7c84af..76f9e88500 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -49,6 +49,7 @@ struct WhiteBox<::L0::CommandListCoreFamily> using BaseClass::appendMemoryCopyBlitRegion; using BaseClass::appendMultiTileBarrier; using BaseClass::appendSignalEventPostWalker; + using BaseClass::appendVfeStateCmdToPatch; using BaseClass::appendWriteKernelTimestamp; using BaseClass::applyMemoryRangesBarrier; using BaseClass::clearCommandsToPatch; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index cd9c77fd54..b65ef5157b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -1505,6 +1505,25 @@ HWTEST2_F(CommandListAppendLaunchKernel, whenAppendVfeStateCmdPatchIsCalledAndHe commandList->appendVfeStateCmdToPatch(); } +HWTEST2_F(CommandListAppendLaunchKernel, GivenHeapfulSupportWhenAppendVfeStateCmdPatchIsCalledThenAddFeCmdToPatchList, IsAtLeastXeCore) { + if constexpr (FamilyType::isHeaplessRequired() == false) { + auto commandList = std::make_unique>>(); + auto result = commandList->initialize(device, NEO::EngineGroupType::compute, 0u); + EXPECT_EQ(0u, commandList->getFrontEndPatchListCount()); + + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + auto expectedGpuAddress = commandList->getCmdContainer().getCommandStream()->getCurrentGpuAddressPosition(); + commandList->appendVfeStateCmdToPatch(); + ASSERT_NE(0u, commandList->commandsToPatch.size()); + EXPECT_EQ(CommandToPatch::FrontEndState, commandList->commandsToPatch[0].type); + EXPECT_EQ(expectedGpuAddress, commandList->commandsToPatch[0].gpuAddress); + EXPECT_EQ(1u, commandList->getFrontEndPatchListCount()); + + commandList->reset(); + EXPECT_EQ(0u, commandList->getFrontEndPatchListCount()); + } +} + HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenCorrectThreadArbitrationPolicyIsSet, IsHeapfulSupported) { DebugManagerStateRestore restorer; debugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1); diff --git a/opencl/test/unit_test/helpers/test_preamble_dg2_and_later.cpp b/opencl/test/unit_test/helpers/test_preamble_dg2_and_later.cpp index f8933db412..14a9cd90be 100644 --- a/opencl/test/unit_test/helpers/test_preamble_dg2_and_later.cpp +++ b/opencl/test/unit_test/helpers/test_preamble_dg2_and_later.cpp @@ -29,7 +29,7 @@ HWTEST2_F(PreambleCfeStateDg2AndLater, whenprogramVFEStateIsCalledWithProperAddi GTEST_SKIP(); } - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties properties{}; properties.frontEndState.disableOverdispatch.value = 1; PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, 0, 0, properties); @@ -40,7 +40,7 @@ HWTEST2_F(PreambleCfeStateDg2AndLater, whenprogramVFEStateIsCalledWithProperAddi EXPECT_TRUE(cfeState->getComputeOverdispatchDisable()); cmdList.clear(); - pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute); + pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute, nullptr); properties.frontEndState.disableOverdispatch.value = 0; PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, 0, 0, properties); parseCommands(linearStream); @@ -61,7 +61,7 @@ HWTEST2_F(PreambleCfeStateDg2AndLater, givenSetDebugFlagWhenPreambleCfeStateIsPr debugManager.flags.ComputeOverdispatchDisable.set(expectedValue1); uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, 16u, emptyProperties); diff --git a/opencl/test/unit_test/helpers/test_preamble_xe3_and_later.cpp b/opencl/test/unit_test/helpers/test_preamble_xe3_and_later.cpp index 72a34dbc8d..e26b27d106 100644 --- a/opencl/test/unit_test/helpers/test_preamble_xe3_and_later.cpp +++ b/opencl/test/unit_test/helpers/test_preamble_xe3_and_later.cpp @@ -29,7 +29,7 @@ HWTEST2_F(PreambleCfeStateXe3AndLater, givenSetDebugFlagWhenPreambleCfeStateIsPr debugManager.flags.MaximumNumberOfThreads.set(expectedValue2); uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, 16u, emptyProperties); diff --git a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp index a8163b7590..2837210475 100644 --- a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp @@ -131,7 +131,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PreambleCfeStateXeHPAndLater, givenScratchEnabledWh uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; uint32_t expectedMaxThreads = GfxCoreHelper::getMaxThreadsForVfe(*defaultHwInfo); - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, expectedMaxThreads, emptyProperties); @@ -166,7 +166,7 @@ HWTEST2_F(PreambleCfeStateXeHPAndLater, givenNotSetDebugFlagWhenPreambleCfeState uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; uint32_t expectedMaxThreads = GfxCoreHelper::getMaxThreadsForVfe(*defaultHwInfo); - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, expectedMaxThreads, emptyProperties); uint32_t maximumNumberOfThreads = cfeState->getMaximumNumberOfThreads(); @@ -197,7 +197,7 @@ HWTEST2_F(PreambleCfeStateXeHPAndLater, givenSetDebugFlagWhenPreambleCfeStateIsP debugManager.flags.MaximumNumberOfThreads.set(expectedValue2); uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, 16u, emptyProperties); diff --git a/opencl/test/unit_test/xe2_hpg_core/test_preamble_xe2_hpg_core.cpp b/opencl/test/unit_test/xe2_hpg_core/test_preamble_xe2_hpg_core.cpp index f5e59cc7e9..e41d5f7e5f 100644 --- a/opencl/test/unit_test/xe2_hpg_core/test_preamble_xe2_hpg_core.cpp +++ b/opencl/test/unit_test/xe2_hpg_core/test_preamble_xe2_hpg_core.cpp @@ -14,7 +14,7 @@ using PreambleCfeState = PreambleFixture; XE2_HPG_CORETEST_F(PreambleCfeState, givenXe2HpgCoreAndConcurrentKernelExecutionTypeWhenCallingProgramVFEStateThenSingleSpliceDispatchCcsModeIsEnabled) { using CFE_STATE = typename FamilyType::CFE_STATE; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties streamProperties{}; streamProperties.initSupport(pDevice->getRootDeviceEnvironment()); streamProperties.frontEndState.setPropertiesAll(true, false, false); @@ -30,7 +30,7 @@ XE2_HPG_CORETEST_F(PreambleCfeState, givenXe2HpgCoreAndConcurrentKernelExecution XE2_HPG_CORETEST_F(PreambleCfeState, givenXe2HpgCoreAndDefaultKernelExecutionTypeWhenCallingProgramVFEStateThenSingleSpliceDispatchCcsModeIsDisabled) { using CFE_STATE = typename FamilyType::CFE_STATE; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties streamProperties{}; streamProperties.initSupport(pDevice->getRootDeviceEnvironment()); streamProperties.frontEndState.setPropertiesAll(false, false, false); diff --git a/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp b/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp index c1d03c8c6f..d3f01bb3aa 100644 --- a/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp +++ b/opencl/test/unit_test/xe_hpg_core/test_cmds_programming_xe_hpg_core.cpp @@ -333,7 +333,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndDisabledFusedEuWhenCfeStateProgrammedThe auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo(); hwInfo.capabilityTable.fusedEuEnabled = false; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); streamProperties.frontEndState.setPropertiesAll(false, false, false); @@ -356,7 +356,7 @@ HWTEST2_F(PreambleCfeState, givenXehpEnabledFusedEuAndDisableFusedDispatchFromKe auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo(); hwInfo.capabilityTable.fusedEuEnabled = true; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); streamProperties.frontEndState.setPropertiesAll(false, true, false); @@ -376,7 +376,7 @@ HWTEST2_F(PreambleCfeState, givenXehpAndEnabledFusedEuWhenCfeStateProgrammedThen auto &hwInfo = *rootDeviceEnvironment.getMutableHardwareInfo(); hwInfo.capabilityTable.fusedEuEnabled = true; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties streamProperties{}; streamProperties.initSupport(rootDeviceEnvironment); streamProperties.frontEndState.setPropertiesAll(false, false, false); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index f437bed8ac..ae015f61bf 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -929,7 +929,7 @@ inline void CommandStreamReceiverHw::programVFEState(LinearStream &cs auto &gfxCoreHelper = getGfxCoreHelper(); auto engineGroupType = gfxCoreHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), hwInfo); - auto pVfeState = PreambleHelper::getSpaceForVfeState(&csr, hwInfo, engineGroupType); + auto pVfeState = PreambleHelper::getSpaceForVfeState(&csr, hwInfo, engineGroupType, nullptr); PreambleHelper::programVfeState( pVfeState, peekRootDeviceEnvironment(), requiredScratchSlot0Size, getScratchPatchAddress(), maxFrontEndThreads, streamProperties); @@ -2032,7 +2032,7 @@ void CommandStreamReceiverHw::dispatchImmediateFlushFrontEndCommand(I auto &gfxCoreHelper = getGfxCoreHelper(); auto engineGroupType = gfxCoreHelper.getEngineGroupType(getOsContext().getEngineType(), getOsContext().getEngineUsage(), peekHwInfo()); - auto feStateCmdSpace = PreambleHelper::getSpaceForVfeState(&csrStream, peekHwInfo(), engineGroupType); + auto feStateCmdSpace = PreambleHelper::getSpaceForVfeState(&csrStream, peekHwInfo(), engineGroupType, nullptr); PreambleHelper::programVfeState(feStateCmdSpace, peekRootDeviceEnvironment(), requiredScratchSlot0Size, diff --git a/shared/source/gen12lp/preamble_gen12lp.cpp b/shared/source/gen12lp/preamble_gen12lp.cpp index 3b6b897301..38239b08d6 100644 --- a/shared/source/gen12lp/preamble_gen12lp.cpp +++ b/shared/source/gen12lp/preamble_gen12lp.cpp @@ -22,10 +22,15 @@ using Family = Gen12LpFamily; template void *PreambleHelper::getSpaceForVfeState(LinearStream *pCommandStream, const HardwareInfo &hwInfo, - EngineGroupType engineGroupType) { + EngineGroupType engineGroupType, + uint64_t *cmdBufferGpuAddress) { using MEDIA_VFE_STATE = typename GfxFamily::MEDIA_VFE_STATE; addPipeControlBeforeVfeCmd(pCommandStream, &hwInfo, engineGroupType); - return pCommandStream->getSpaceForCmd(); + void *cmdPtr = pCommandStream->getSpaceForCmd(); + if (cmdBufferGpuAddress) { + *cmdBufferGpuAddress = (pCommandStream->getCurrentGpuAddressPosition() - sizeof(MEDIA_VFE_STATE)); + } + return cmdPtr; } template diff --git a/shared/source/helpers/preamble.h b/shared/source/helpers/preamble.h index cff3dc96f5..f8c2a6f491 100644 --- a/shared/source/helpers/preamble.h +++ b/shared/source/helpers/preamble.h @@ -37,7 +37,8 @@ struct PreambleHelper { static void appendProgramVFEState(const RootDeviceEnvironment &rootDeviceEnvironment, const StreamProperties &streamProperties, void *cmd); static void *getSpaceForVfeState(LinearStream *pCommandStream, const HardwareInfo &hwInfo, - EngineGroupType engineGroupType); + EngineGroupType engineGroupType, + uint64_t *cmdBufferGpuAddress); static void programVfeState(void *pVfeState, const RootDeviceEnvironment &rootDeviceEnvironment, uint32_t scratchSize, diff --git a/shared/source/helpers/preamble_xehp_and_later.inl b/shared/source/helpers/preamble_xehp_and_later.inl index a669be5a95..c6c0b2eaee 100644 --- a/shared/source/helpers/preamble_xehp_and_later.inl +++ b/shared/source/helpers/preamble_xehp_and_later.inl @@ -32,10 +32,15 @@ uint32_t PreambleHelper::getUrbEntryAllocationSize() { template void *PreambleHelper::getSpaceForVfeState(LinearStream *pCommandStream, const HardwareInfo &hwInfo, - EngineGroupType engineGroupType) { + EngineGroupType engineGroupType, + uint64_t *cmdBufferGpuAddress) { if constexpr (GfxFamily::isHeaplessRequired() == false) { using CFE_STATE = typename GfxFamily::CFE_STATE; - return pCommandStream->getSpace(sizeof(CFE_STATE)); + void *cmdPtr = pCommandStream->getSpace(sizeof(CFE_STATE)); + if (cmdBufferGpuAddress) { + *cmdBufferGpuAddress = (pCommandStream->getCurrentGpuAddressPosition() - sizeof(CFE_STATE)); + } + return cmdPtr; } else { return nullptr; } diff --git a/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp b/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp index cd24aa6b80..145a6df0c1 100644 --- a/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/test_preamble_gen12lp.cpp @@ -54,7 +54,10 @@ HWTEST2_F(Gen12LpPreambleVfeState, GivenWaOffWhenProgrammingVfeStateThenProgramm typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; testWaTable->flags.waSendMIFLUSHBeforeVFE = 0; LinearStream &cs = linearStream; - auto vfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pDevice->getHardwareInfo(), EngineGroupType::renderCompute); + uint64_t expectedBufferGpuAddress = linearStream.getCurrentGpuAddressPosition() + sizeof(PIPE_CONTROL); + uint64_t cmdBufferGpuAddress = 0; + auto vfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pDevice->getHardwareInfo(), EngineGroupType::renderCompute, &cmdBufferGpuAddress); + EXPECT_EQ(expectedBufferGpuAddress, cmdBufferGpuAddress); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(vfeCmd, pDevice->getRootDeviceEnvironment(), 0u, 0, 672u, emptyProperties); @@ -76,7 +79,7 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenCcsEngineWhenWaIsSetThenAppropriatePipeC testWaTable->flags.waSendMIFLUSHBeforeVFE = 1; LinearStream &cs = linearStream; - auto vfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pDevice->getHardwareInfo(), EngineGroupType::compute); + auto vfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pDevice->getHardwareInfo(), EngineGroupType::compute, nullptr); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(vfeCmd, pDevice->getRootDeviceEnvironment(), 0u, 0, 672u, emptyProperties); @@ -97,7 +100,7 @@ HWTEST2_F(Gen12LpPreambleVfeState, givenRcsEngineWhenWaIsSetThenAppropriatePipeC testWaTable->flags.waSendMIFLUSHBeforeVFE = 1; LinearStream &cs = linearStream; - auto vfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pDevice->getHardwareInfo(), EngineGroupType::renderCompute); + auto vfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, pDevice->getHardwareInfo(), EngineGroupType::renderCompute, nullptr); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(vfeCmd, pDevice->getRootDeviceEnvironment(), 0u, 0, 672u, emptyProperties); diff --git a/shared/test/unit_test/preamble/preamble_tests.cpp b/shared/test/unit_test/preamble/preamble_tests.cpp index 25fabd7727..a2cbaa1077 100644 --- a/shared/test/unit_test/preamble/preamble_tests.cpp +++ b/shared/test/unit_test/preamble/preamble_tests.cpp @@ -159,6 +159,7 @@ HWTEST_F(PreambleTest, givenMinHwThreadsUnoccupiedDebugVariableWhenGetThreadsMax HWCMDTEST_F(IGFX_GEN12LP_CORE, PreambleTest, WhenProgramVFEStateIsCalledThenCorrectVfeStateAddressIsReturned) { using MEDIA_VFE_STATE = typename FamilyType::MEDIA_VFE_STATE; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; char buffer[64]; MockGraphicsAllocation graphicsAllocation(buffer, sizeof(buffer)); @@ -166,7 +167,10 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, PreambleTest, WhenProgramVFEStateIsCalledThenCorr uint64_t addressToPatch = 0xC0DEC0DE; uint64_t expectedAddress = 0xC0DEC000; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, *defaultHwInfo, EngineGroupType::renderCompute); + uint64_t expectedGpuAddress = preambleStream.getCurrentGpuAddressPosition() + sizeof(PIPE_CONTROL); + uint64_t cmdBufferGpuAddress = 0; + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, *defaultHwInfo, EngineGroupType::renderCompute, &cmdBufferGpuAddress); + EXPECT_EQ(expectedGpuAddress, cmdBufferGpuAddress); StreamProperties emptyProperties{}; MockExecutionEnvironment executionEnvironment{}; PreambleHelper::programVfeState(pVfeCmd, *executionEnvironment.rootDeviceEnvironments[0], 1024u, addressToPatch, 10u, emptyProperties); @@ -190,7 +194,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, PreambleTest, WhenGetScratchSpaceAddressOffsetFor FlatBatchBufferHelperHw helper(*mockDevice->getExecutionEnvironment()); uint64_t addressToPatch = 0xC0DEC0DE; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, mockDevice->getHardwareInfo(), EngineGroupType::renderCompute); + auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, mockDevice->getHardwareInfo(), EngineGroupType::renderCompute, nullptr); StreamProperties emptyProperties{}; PreambleHelper::programVfeState(pVfeCmd, mockDevice->getRootDeviceEnvironment(), 1024u, addressToPatch, 10u, emptyProperties); diff --git a/shared/test/unit_test/preamble/test_preamble_xe2_and_later.cpp b/shared/test/unit_test/preamble/test_preamble_xe2_and_later.cpp index 3c265649e1..d9320f83d1 100644 --- a/shared/test/unit_test/preamble/test_preamble_xe2_and_later.cpp +++ b/shared/test/unit_test/preamble/test_preamble_xe2_and_later.cpp @@ -29,13 +29,13 @@ HWTEST2_F(PreambleTest, givenAtLeastXe2HpgCoreAndNotSetDebugFlagWhenPreambleCfeS MockGraphicsAllocation graphicsAllocation(buffer, sizeof(buffer)); LinearStream preambleStream(&graphicsAllocation, graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize()); - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto feCmdPtr = PreambleHelper::getSpaceForVfeState(&preambleStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties streamProperties{}; MockExecutionEnvironment executionEnvironment{}; - PreambleHelper::programVfeState(pVfeCmd, *executionEnvironment.rootDeviceEnvironments[0], 0u, 0, 0, streamProperties); + PreambleHelper::programVfeState(feCmdPtr, *executionEnvironment.rootDeviceEnvironments[0], 0u, 0, 0, streamProperties); - auto &cfeState = *reinterpret_cast(pVfeCmd); + auto &cfeState = *reinterpret_cast(feCmdPtr); EXPECT_EQ(cfeState.getStackIdControl(), static_cast(0b00u)); } @@ -47,15 +47,19 @@ HWTEST2_F(PreambleTest, givenAtLeastXe2HpgCoreAndSetDebugFlagWhenPreambleCfeStat LinearStream preambleStream(&graphicsAllocation, graphicsAllocation.getUnderlyingBuffer(), graphicsAllocation.getUnderlyingBufferSize()); DebugManagerStateRestore debugRestore; + uint64_t expectedBufferGpuAddress = preambleStream.getCurrentGpuAddressPosition(); + uint64_t cmdBufferGpuAddress = 0; + debugManager.flags.CFEStackIDControl.set(0b10u); - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&preambleStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto feCmdPtr = PreambleHelper::getSpaceForVfeState(&preambleStream, *defaultHwInfo, EngineGroupType::renderCompute, &cmdBufferGpuAddress); StreamProperties streamProperties{}; + EXPECT_EQ(expectedBufferGpuAddress, cmdBufferGpuAddress); MockExecutionEnvironment executionEnvironment{}; - PreambleHelper::programVfeState(pVfeCmd, *executionEnvironment.rootDeviceEnvironments[0], 0u, 0, 0, streamProperties); + PreambleHelper::programVfeState(feCmdPtr, *executionEnvironment.rootDeviceEnvironments[0], 0u, 0, 0, streamProperties); - auto &cfeState = *reinterpret_cast(pVfeCmd); + auto &cfeState = *reinterpret_cast(feCmdPtr); EXPECT_EQ(cfeState.getStackIdControl(), static_cast(0b10u)); } diff --git a/shared/test/unit_test/preamble/test_preamble_xe_hpg_core.cpp b/shared/test/unit_test/preamble/test_preamble_xe_hpg_core.cpp index 2aee71e830..9b63de3abf 100644 --- a/shared/test/unit_test/preamble/test_preamble_xe_hpg_core.cpp +++ b/shared/test/unit_test/preamble/test_preamble_xe_hpg_core.cpp @@ -25,13 +25,19 @@ HWTEST2_F(PreambleTest, givenDisableEUFusionWhenProgramVFEStateThenFusedEUDispat auto buffer = std::unique_ptr(new char[bufferSize]); LinearStream stream(buffer.get(), bufferSize); - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&stream, *defaultHwInfo.get(), EngineGroupType::renderCompute); + stream.setGpuBase(0x1000); + uint64_t expectedBufferGpuAddress = stream.getCurrentGpuAddressPosition(); + uint64_t cmdBufferGpuAddress = 0; + + auto feCmdPtr = PreambleHelper::getSpaceForVfeState(&stream, *defaultHwInfo.get(), EngineGroupType::renderCompute, &cmdBufferGpuAddress); + EXPECT_EQ(expectedBufferGpuAddress, cmdBufferGpuAddress); + StreamProperties props; props.frontEndState.disableEUFusion.set(true); MockExecutionEnvironment executionEnvironment{}; - PreambleHelper::programVfeState(pVfeCmd, *executionEnvironment.rootDeviceEnvironments[0], 0, 0, 0, props); + PreambleHelper::programVfeState(feCmdPtr, *executionEnvironment.rootDeviceEnvironments[0], 0, 0, 0, props); - auto cfeCmd = reinterpret_cast(pVfeCmd); + auto cfeCmd = reinterpret_cast(feCmdPtr); EXPECT_EQ(1u, cfeCmd->getFusedEuDispatch()); } diff --git a/shared/test/unit_test/xe_hpc_core/pvc/test_preamble_pvc.cpp b/shared/test/unit_test/xe_hpc_core/pvc/test_preamble_pvc.cpp index 4f160c66b0..b47c308a08 100644 --- a/shared/test/unit_test/xe_hpc_core/pvc/test_preamble_pvc.cpp +++ b/shared/test/unit_test/xe_hpc_core/pvc/test_preamble_pvc.cpp @@ -19,7 +19,7 @@ PVCTEST_F(PreambleCfeState, givenXeHpcAndKernelExecutionTypeAndRevisionWhenCalli auto hwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); const auto &productHelper = pDevice->getProductHelper(); - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *hwInfo, EngineGroupType::renderCompute); + auto feCmdPtr = PreambleHelper::getSpaceForVfeState(&linearStream, *hwInfo, EngineGroupType::renderCompute, nullptr); std::array, 4> revisions = { {{REVISION_A0, false}, {REVISION_A0, true}, @@ -32,7 +32,7 @@ PVCTEST_F(PreambleCfeState, givenXeHpcAndKernelExecutionTypeAndRevisionWhenCalli hwInfo->platform.usRevId = productHelper.getHwRevIdFromStepping(revision, *hwInfo); streamProperties.frontEndState.setPropertiesAll(kernelExecutionType, false, false); - PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, 0, 0, streamProperties); + PreambleHelper::programVfeState(feCmdPtr, pDevice->getRootDeviceEnvironment(), 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); diff --git a/shared/test/unit_test/xe_hpc_core/test_preamble_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/test_preamble_xe_hpc_core.cpp index e24aec4ba4..9047573fe1 100644 --- a/shared/test/unit_test/xe_hpc_core/test_preamble_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/test_preamble_xe_hpc_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -23,10 +23,14 @@ XE_HPC_CORETEST_F(PreambleCfeState, givenXeHpcCoreAndSetDebugFlagWhenPreambleCfe debugManager.flags.CFEComputeDispatchAllWalkerEnable.set(expectedValue); + uint64_t expectedBufferGpuAddress = linearStream.getCurrentGpuAddressPosition(); + uint64_t cmdBufferGpuAddress = 0; + uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto feCmdPtr = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, &cmdBufferGpuAddress); + ASSERT_EQ(expectedBufferGpuAddress, expectedBufferGpuAddress); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, 16u, emptyProperties); + PreambleHelper::programVfeState(feCmdPtr, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, 16u, emptyProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); @@ -45,12 +49,12 @@ XE_HPC_CORETEST_F(PreambleCfeState, givenKernelExecutionTypeConcurrentAndRevisio const auto &productHelper = pDevice->getProductHelper(); hwInfo.platform.usRevId = productHelper.getHwRevIdFromStepping(REVISION_B, hwInfo); - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute); + auto feCmdPtr = PreambleHelper::getSpaceForVfeState(&linearStream, hwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties streamProperties{}; streamProperties.initSupport(pDevice->getRootDeviceEnvironment()); streamProperties.frontEndState.setPropertiesAll(true, false, false); - PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, 0, 0, streamProperties); + PreambleHelper::programVfeState(feCmdPtr, pDevice->getRootDeviceEnvironment(), 0u, 0, 0, streamProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), cfeStateIt); @@ -73,9 +77,9 @@ XE_HPC_CORETEST_F(PreambleCfeState, givenNotSetDebugFlagWhenPreambleCfeStateIsPr uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; uint32_t expectedMaxThreads = GfxCoreHelper::getMaxThreadsForVfe(*defaultHwInfo); - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto feCmdPtr = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, expectedMaxThreads, emptyProperties); + PreambleHelper::programVfeState(feCmdPtr, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, expectedMaxThreads, emptyProperties); uint32_t maximumNumberOfThreads = cfeState->getMaximumNumberOfThreads(); EXPECT_EQ(numberOfWalkers, cfeState->getNumberOfWalkers()); @@ -100,9 +104,9 @@ XE_HPC_CORETEST_F(PreambleCfeState, givenSetDebugFlagWhenPreambleCfeStateIsProgr debugManager.flags.MaximumNumberOfThreads.set(expectedValue2); uint64_t expectedAddress = 1 << CFE_STATE::SCRATCHSPACEBUFFER_BIT_SHIFT; - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute); + auto feCmdPtr = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::renderCompute, nullptr); StreamProperties emptyProperties{}; - PreambleHelper::programVfeState(pVfeCmd, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, 16u, emptyProperties); + PreambleHelper::programVfeState(feCmdPtr, pDevice->getRootDeviceEnvironment(), 0u, expectedAddress, 16u, emptyProperties); parseCommands(linearStream); auto cfeStateIt = find(cmdList.begin(), cmdList.end());