diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 5dda319a26..f31ccb50d2 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -12,6 +12,7 @@ #include "shared/source/command_stream/preemption.h" #include "shared/source/debugger/debugger_l0.h" #include "shared/source/device/device.h" +#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/heap_helper.h" @@ -2339,8 +2340,20 @@ void CommandListCoreFamily::programStateBaseAddress(NEO::CommandC NEO::EncodeWA::addPipeControlBeforeStateBaseAddress(*commandContainer.getCommandStream(), hwInfo, isRcs); + auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); + uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); + STATE_BASE_ADDRESS sba; - NEO::EncodeStateBaseAddress::encode(commandContainer, sba, this->partitionCount > 1); + + NEO::EncodeStateBaseAddressArgs encodeStateBaseAddressArgs = { + &commandContainer, + sba, + statelessMocsIndex, + false, + this->partitionCount > 1, + isRcs}; + NEO::EncodeStateBaseAddress::encode(encodeStateBaseAddressArgs); + if (NEO::Debugger::isDebugEnabled(this->internalUsage) && device->getL0Debugger()) { NEO::Debugger::SbaAddresses sbaAddresses = {}; NEO::EncodeStateBaseAddress::setSbaAddressesForDebugger(sbaAddresses, sba); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index d35f6c4787..99707bf233 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -133,7 +133,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K launchParams.isCooperative, // isCooperative false, // isHostScopeSignalEvent false, // isKernelUsingSystemAllocation - cmdListType == CommandListType::TYPE_IMMEDIATE // isKernelDispatchedFromImmediateCmdList + cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList + engineGroupType == NEO::EngineGroupType::RenderCompute // isRcs }; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper()); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index f266392396..fda8bda553 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -244,7 +244,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K launchParams.isCooperative, // isCooperative isHostSignalScopeEvent, // isHostScopeSignalEvent isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation - cmdListType == CommandListType::TYPE_IMMEDIATE // isKernelDispatchedFromImmediateCmdList + cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList + engineGroupType == NEO::EngineGroupType::RenderCompute // isRcs }; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper()); this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl index de900aed83..f6535ff366 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl @@ -52,7 +52,7 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool auto indirectObjectHeapBaseAddress = neoDevice->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), useLocalMemoryForIndirectHeap); auto instructionHeapBaseAddress = neoDevice->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), neoDevice->getMemoryManager()->isLocalMemoryUsedForIsa(neoDevice->getRootDeviceIndex())); - NEO::StateBaseAddressHelperArgs args = { + NEO::StateBaseAddressHelperArgs stateBaseAddressHelperArgs = { gsba, // generalStateBase indirectObjectHeapBaseAddress, // indirectObjectHeapBaseAddress instructionHeapBaseAddress, // instructionHeapBaseAddress @@ -72,7 +72,7 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool false // areMultipleSubDevicesInContext }; - NEO::StateBaseAddressHelper::programStateBaseAddress(args); + NEO::StateBaseAddressHelper::programStateBaseAddress(stateBaseAddressHelperArgs); *sbaCmdBuf = sbaCmd; csr->setGSBAStateDirty(false); @@ -92,7 +92,7 @@ size_t CommandQueueHw::estimateStateBaseAddressCmdSize() { using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - size_t size = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) + NEO::EncodeWA::getAdditionalPipelineSelectSize(*device->getNEODevice()); + size_t size = sizeof(STATE_BASE_ADDRESS) + sizeof(PIPE_CONTROL) + NEO::EncodeWA::getAdditionalPipelineSelectSize(*device->getNEODevice(), this->csr->isRcs()); if (NEO::Debugger::isDebugEnabled(internalUsage) && device->getL0Debugger() != nullptr) { const size_t trackedAddressesCount = 6; diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl index 5d753c0f53..ed22502c86 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl @@ -43,7 +43,7 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool auto sbaCmdBuf = NEO::StateBaseAddressHelper::getSpaceForSbaCmd(commandStream); STATE_BASE_ADDRESS sbaCmd; - NEO::StateBaseAddressHelperArgs args = { + NEO::StateBaseAddressHelperArgs stateBaseAddressHelperArgs = { 0, // generalStateBase indirectObjectStateBaseAddress, // indirectObjectHeapBaseAddress instructionStateBaseAddress, // instructionHeapBaseAddress @@ -63,7 +63,7 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool false // areMultipleSubDevicesInContext }; - NEO::StateBaseAddressHelper::programStateBaseAddress(args); + NEO::StateBaseAddressHelper::programStateBaseAddress(stateBaseAddressHelperArgs); *sbaCmdBuf = sbaCmd; auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp index e2af585b98..36b7931ebf 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_1.cpp @@ -161,6 +161,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA false, false, false, + false, false}; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs, static_cast *>(commandList.get())->getLogicalStateHelper()); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 7c2647b245..e00bce395e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -488,6 +488,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenNotEnoughSpaceInCommandStreamWhenA false, false, false, + false, false}; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs, nullptr); diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 6b565d8c51..985a93ae7a 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -47,6 +47,7 @@ struct EncodeDispatchKernelArgs { bool isHostScopeSignalEvent = false; bool isKernelUsingSystemAllocation = false; bool isKernelDispatchedFromImmediateCmdList = false; + bool isRcs = false; }; struct EncodeWalkerArgs { @@ -240,13 +241,26 @@ struct EncodeMediaInterfaceDescriptorLoad { static void encode(CommandContainer &container); }; +template +struct EncodeStateBaseAddressArgs { + using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + + CommandContainer *container = nullptr; + STATE_BASE_ADDRESS &sbaCmd; + + uint32_t statelessMocsIndex = 0; + + bool useGlobalAtomics = false; + bool multiOsContextCapable = false; + bool isRcs = false; +}; + template struct EncodeStateBaseAddress { using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; - static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable); - static void encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable); + static void encode(EncodeStateBaseAddressArgs &args); static void setSbaAddressesForDebugger(NEO::Debugger::SbaAddresses &sbaAddress, const STATE_BASE_ADDRESS &sbaCmd); - static size_t getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container); + static size_t getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container, bool isRcs); }; template @@ -313,7 +327,7 @@ template struct EncodeWA { static void encodeAdditionalPipelineSelect(LinearStream &stream, const PipelineSelectArgs &args, bool is3DPipeline, const HardwareInfo &hwInfo, bool isRcs); - static size_t getAdditionalPipelineSelectSize(Device &device); + static size_t getAdditionalPipelineSelectSize(Device &device, bool isRcs); static void addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args, const HardwareInfo &hwInfo, bool isRcs); diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index a8b2be4824..60e3e84c87 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -178,7 +178,15 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis auto gmmHelper = container.getDevice()->getGmmHelper(); uint32_t statelessMocsIndex = args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); - EncodeStateBaseAddress::encode(container, sba, statelessMocsIndex, false, false); + + EncodeStateBaseAddressArgs encodeStateBaseAddressArgs = { + &container, + sba, + statelessMocsIndex, + false, + false, + args.isRcs}; + EncodeStateBaseAddress::encode(encodeStateBaseAddressArgs); container.setDirtyStateForAllHeaps(false); args.requiresUncachedMocs = false; } @@ -350,58 +358,51 @@ void EncodeStateBaseAddress::setSbaAddressesForDebugger(NEO::Debugger::S } template -void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable) { - auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); - uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); - EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, false, multiOsContextCapable); -} - -template -void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable) { - auto &device = *container.getDevice(); +void EncodeStateBaseAddress::encode(EncodeStateBaseAddressArgs &args) { + auto &device = *args.container->getDevice(); auto &hwInfo = device.getHardwareInfo(); - auto isRcs = device.getDefaultEngine().commandStreamReceiver->isRcs(); - if (container.isAnyHeapDirty()) { - EncodeWA::encodeAdditionalPipelineSelect(*container.getCommandStream(), {}, true, hwInfo, isRcs); + + if (args.container->isAnyHeapDirty()) { + EncodeWA::encodeAdditionalPipelineSelect(*args.container->getCommandStream(), {}, true, hwInfo, args.isRcs); } auto gmmHelper = device.getGmmHelper(); - auto dsh = container.isHeapDirty(HeapType::DYNAMIC_STATE) ? container.getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr; - auto ioh = container.isHeapDirty(HeapType::INDIRECT_OBJECT) ? container.getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr; - auto ssh = container.isHeapDirty(HeapType::SURFACE_STATE) ? container.getIndirectHeap(HeapType::SURFACE_STATE) : nullptr; + auto dsh = args.container->isHeapDirty(HeapType::DYNAMIC_STATE) ? args.container->getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr; + auto ioh = args.container->isHeapDirty(HeapType::INDIRECT_OBJECT) ? args.container->getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr; + auto ssh = args.container->isHeapDirty(HeapType::SURFACE_STATE) ? args.container->getIndirectHeap(HeapType::SURFACE_STATE) : nullptr; - StateBaseAddressHelperArgs args = { - 0, // generalStateBase - container.getIndirectObjectHeapBaseAddress(), // indirectObjectHeapBaseAddress - container.getInstructionHeapBaseAddress(), // instructionHeapBaseAddress - 0, // globalHeapsBaseAddress - &sbaCmd, // stateBaseAddressCmd - dsh, // dsh - ioh, // ioh - ssh, // ssh - gmmHelper, // gmmHelper - statelessMocsIndex, // statelessMocsIndex - NEO::MemoryCompressionState::NotApplicable, // memoryCompressionState - false, // setInstructionStateBaseAddress - false, // setGeneralStateBaseAddress - false, // useGlobalHeapsBaseAddress - false, // isMultiOsContextCapable - useGlobalAtomics, // useGlobalAtomics - false // areMultipleSubDevicesInContext + StateBaseAddressHelperArgs stateBaseAddressHelperArgs = { + 0, // generalStateBase + args.container->getIndirectObjectHeapBaseAddress(), // indirectObjectHeapBaseAddress + args.container->getInstructionHeapBaseAddress(), // instructionHeapBaseAddress + 0, // globalHeapsBaseAddress + &args.sbaCmd, // stateBaseAddressCmd + dsh, // dsh + ioh, // ioh + ssh, // ssh + gmmHelper, // gmmHelper + args.statelessMocsIndex, // statelessMocsIndex + NEO::MemoryCompressionState::NotApplicable, // memoryCompressionState + false, // setInstructionStateBaseAddress + false, // setGeneralStateBaseAddress + false, // useGlobalHeapsBaseAddress + false, // isMultiOsContextCapable + args.useGlobalAtomics, // useGlobalAtomics + false // areMultipleSubDevicesInContext }; - StateBaseAddressHelper::programStateBaseAddress(args); + StateBaseAddressHelper::programStateBaseAddress(stateBaseAddressHelperArgs); - auto cmdSpace = StateBaseAddressHelper::getSpaceForSbaCmd(*container.getCommandStream()); - *cmdSpace = sbaCmd; + auto cmdSpace = StateBaseAddressHelper::getSpaceForSbaCmd(*args.container->getCommandStream()); + *cmdSpace = args.sbaCmd; - EncodeWA::encodeAdditionalPipelineSelect(*container.getCommandStream(), {}, false, hwInfo, isRcs); + EncodeWA::encodeAdditionalPipelineSelect(*args.container->getCommandStream(), {}, false, hwInfo, args.isRcs); } template -size_t EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container) { - return sizeof(typename Family::STATE_BASE_ADDRESS) + 2 * EncodeWA::getAdditionalPipelineSelectSize(device); +size_t EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container, bool isRcs) { + return sizeof(typename Family::STATE_BASE_ADDRESS) + 2 * EncodeWA::getAdditionalPipelineSelectSize(device, isRcs); } template @@ -427,7 +428,7 @@ inline void EncodeWA::encodeAdditionalPipelineSelect(LinearStream &st const HardwareInfo &hwInfo, bool isRcs) {} template -inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device) { +inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device, bool isRcs) { return 0; } diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 474c820613..6630f606f3 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -215,7 +215,15 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis auto gmmHelper = container.getDevice()->getGmmHelper(); uint32_t statelessMocsIndex = args.requiresUncachedMocs ? (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) >> 1) : (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); - EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, args.useGlobalAtomics, args.partitionCount > 1); + + EncodeStateBaseAddressArgs encodeStateBaseAddressArgs = { + &container, + sbaCmd, + statelessMocsIndex, + args.useGlobalAtomics, + args.partitionCount > 1, + args.isRcs}; + EncodeStateBaseAddress::encode(encodeStateBaseAddressArgs); container.setDirtyStateForAllHeaps(false); args.requiresUncachedMocs = false; } @@ -464,62 +472,55 @@ void EncodeStateBaseAddress::setSbaAddressesForDebugger(NEO::Debugger::S } template -void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, bool multiOsContextCapable) { - auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); - uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); - EncodeStateBaseAddress::encode(container, sbaCmd, statelessMocsIndex, false, multiOsContextCapable); -} +void EncodeStateBaseAddress::encode(EncodeStateBaseAddressArgs &args) { + auto gmmHelper = args.container->getDevice()->getRootDeviceEnvironment().getGmmHelper(); -template -void EncodeStateBaseAddress::encode(CommandContainer &container, STATE_BASE_ADDRESS &sbaCmd, uint32_t statelessMocsIndex, bool useGlobalAtomics, bool multiOsContextCapable) { - auto gmmHelper = container.getDevice()->getRootDeviceEnvironment().getGmmHelper(); + auto dsh = args.container->isHeapDirty(HeapType::DYNAMIC_STATE) ? args.container->getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr; + auto ioh = args.container->isHeapDirty(HeapType::INDIRECT_OBJECT) ? args.container->getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr; + auto ssh = args.container->isHeapDirty(HeapType::SURFACE_STATE) ? args.container->getIndirectHeap(HeapType::SURFACE_STATE) : nullptr; - auto dsh = container.isHeapDirty(HeapType::DYNAMIC_STATE) ? container.getIndirectHeap(HeapType::DYNAMIC_STATE) : nullptr; - auto ioh = container.isHeapDirty(HeapType::INDIRECT_OBJECT) ? container.getIndirectHeap(HeapType::INDIRECT_OBJECT) : nullptr; - auto ssh = container.isHeapDirty(HeapType::SURFACE_STATE) ? container.getIndirectHeap(HeapType::SURFACE_STATE) : nullptr; - - StateBaseAddressHelperArgs args = { - 0, // generalStateBase - container.getIndirectObjectHeapBaseAddress(), // indirectObjectHeapBaseAddress - container.getInstructionHeapBaseAddress(), // instructionHeapBaseAddress - 0, // globalHeapsBaseAddress - &sbaCmd, // stateBaseAddressCmd - dsh, // dsh - ioh, // ioh - ssh, // ssh - gmmHelper, // gmmHelper - statelessMocsIndex, // statelessMocsIndex - NEO::MemoryCompressionState::NotApplicable, // memoryCompressionState - true, // setInstructionStateBaseAddress - true, // setGeneralStateBaseAddress - false, // useGlobalHeapsBaseAddress - multiOsContextCapable, // isMultiOsContextCapable - useGlobalAtomics, // useGlobalAtomics - false // areMultipleSubDevicesInContext + StateBaseAddressHelperArgs stateBaseAddressHelperArgs = { + 0, // generalStateBase + args.container->getIndirectObjectHeapBaseAddress(), // indirectObjectHeapBaseAddress + args.container->getInstructionHeapBaseAddress(), // instructionHeapBaseAddress + 0, // globalHeapsBaseAddress + &args.sbaCmd, // stateBaseAddressCmd + dsh, // dsh + ioh, // ioh + ssh, // ssh + gmmHelper, // gmmHelper + args.statelessMocsIndex, // statelessMocsIndex + NEO::MemoryCompressionState::NotApplicable, // memoryCompressionState + true, // setInstructionStateBaseAddress + true, // setGeneralStateBaseAddress + false, // useGlobalHeapsBaseAddress + args.multiOsContextCapable, // isMultiOsContextCapable + args.useGlobalAtomics, // useGlobalAtomics + false // areMultipleSubDevicesInContext }; - StateBaseAddressHelper::programStateBaseAddress(args); + StateBaseAddressHelper::programStateBaseAddress(stateBaseAddressHelperArgs); - auto cmdSpace = StateBaseAddressHelper::getSpaceForSbaCmd(*container.getCommandStream()); - *cmdSpace = sbaCmd; + auto cmdSpace = StateBaseAddressHelper::getSpaceForSbaCmd(*args.container->getCommandStream()); + *cmdSpace = args.sbaCmd; - auto &hwInfo = container.getDevice()->getHardwareInfo(); + auto &hwInfo = args.container->getDevice()->getHardwareInfo(); auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { - cmdSpace = StateBaseAddressHelper::getSpaceForSbaCmd(*container.getCommandStream()); - *cmdSpace = sbaCmd; + cmdSpace = StateBaseAddressHelper::getSpaceForSbaCmd(*args.container->getCommandStream()); + *cmdSpace = args.sbaCmd; } - if (container.isHeapDirty(HeapType::SURFACE_STATE)) { - auto heap = container.getIndirectHeap(HeapType::SURFACE_STATE); - StateBaseAddressHelper::programBindingTableBaseAddress(*container.getCommandStream(), + if (args.container->isHeapDirty(HeapType::SURFACE_STATE)) { + auto heap = args.container->getIndirectHeap(HeapType::SURFACE_STATE); + StateBaseAddressHelper::programBindingTableBaseAddress(*args.container->getCommandStream(), *heap, gmmHelper); } } template -size_t EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container) { +size_t EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(Device &device, CommandContainer &container, bool isRcs) { auto &hwInfo = device.getHardwareInfo(); auto &hwInfoConfig = *HwInfoConfig::get(hwInfo.platform.eProductFamily); @@ -677,7 +678,7 @@ inline void EncodeWA::encodeAdditionalPipelineSelect(LinearStream &strea const HardwareInfo &hwInfo, bool isRcs) {} template -inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device) { +inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device, bool isRcs) { return 0u; } template diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 7768621038..935bccfe38 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -25,9 +25,9 @@ using Family = NEO::Gen12LpFamily; namespace NEO { template <> -size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device) { +size_t EncodeWA::getAdditionalPipelineSelectSize(Device &device, bool isRcs) { size_t size = 0; - if (device.getDefaultEngine().commandStreamReceiver->isRcs()) { + if (isRcs) { size += 2 * PreambleHelper::getCmdSizeForPipelineSelect(device.getHardwareInfo()); } return size; diff --git a/shared/test/unit_test/encoders/command_encoder_tests_dg2.cpp b/shared/test/unit_test/encoders/command_encoder_tests_dg2.cpp index e68994fbcf..b531f754a9 100644 --- a/shared/test/unit_test/encoders/command_encoder_tests_dg2.cpp +++ b/shared/test/unit_test/encoders/command_encoder_tests_dg2.cpp @@ -31,14 +31,14 @@ HWTEST2_F(DG2CommandEncoderTest, givenDG2WhenGettingRequiredSizeForStateBaseAddr auto container = MockCommandContainer(); container.clearHeaps(); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); EXPECT_EQ(size, 176ul); } HWTEST2_F(DG2CommandEncoderTest, givenDG2AndCommandContainerWithDirtyHeapWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsDG2) { auto container = CommandContainer(); container.setHeapDirty(HeapType::SURFACE_STATE); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); EXPECT_EQ(size, 192ul); } diff --git a/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp b/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp index 7c13e51e42..1cdbf4ea83 100644 --- a/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/command_encoder_tests_xehp_and_later.cpp @@ -33,21 +33,21 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterHardwareCommandsTest, GivenXeHPAndLater HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterHardwareCommandsTest, givenXeHPAndLaterPlatformWhenGetAdditionalPipelineSelectSizeIsCalledThenZeroIsReturned) { MockDevice device; - EXPECT_EQ(0u, EncodeWA::getAdditionalPipelineSelectSize(device)); + EXPECT_EQ(0u, EncodeWA::getAdditionalPipelineSelectSize(device, false)); } using XeHPAndLaterCommandEncoderTest = Test; HWTEST2_F(XeHPAndLaterCommandEncoderTest, whenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsAtLeastXeHpCore) { auto container = CommandContainer(); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); EXPECT_EQ(size, 104ul); } HWTEST2_F(XeHPAndLaterCommandEncoderTest, givenCommandContainerWithDirtyHeapWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsAtLeastXeHpCore) { auto container = CommandContainer(); container.setHeapDirty(HeapType::SURFACE_STATE); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); EXPECT_EQ(size, 104ul); } diff --git a/shared/test/unit_test/encoders/test_command_encoder.cpp b/shared/test/unit_test/encoders/test_command_encoder.cpp index 920a0629e6..26b6d0be9f 100644 --- a/shared/test/unit_test/encoders/test_command_encoder.cpp +++ b/shared/test/unit_test/encoders/test_command_encoder.cpp @@ -24,43 +24,61 @@ using CommandEncoderTest = Test; using Platforms = IsWithinProducts; HWTEST2_F(CommandEncoderTest, whenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, Platforms) { auto container = CommandContainer(); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); EXPECT_EQ(size, 76ul); } -HWTEST2_F(CommandEncoderTest, givenGLLPWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsTGLLP) { +HWTEST2_F(CommandEncoderTest, givenTglLpUsingRcsWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsTGLLP) { auto container = CommandContainer(); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, true); EXPECT_EQ(size, 200ul); } -HWTEST2_F(CommandEncoderTest, givenDG1WhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsDG1) { +HWTEST2_F(CommandEncoderTest, givenTglLpNotUsingRcsWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsTGLLP) { auto container = CommandContainer(); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); - EXPECT_EQ(size, 200ul); -} - -HWTEST2_F(CommandEncoderTest, givenEHLWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsEHL) { - auto container = CommandContainer(); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); EXPECT_EQ(size, 88ul); } -HWTEST2_F(CommandEncoderTest, givenRKLWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsRKL) { +HWTEST2_F(CommandEncoderTest, givenDg1UsingRcsWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsDG1) { auto container = CommandContainer(); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, true); + EXPECT_EQ(size, 200ul); +} + +HWTEST2_F(CommandEncoderTest, givenDg1NotUsingRcsWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsDG1) { + auto container = CommandContainer(); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); + EXPECT_EQ(size, 88ul); +} + +HWTEST2_F(CommandEncoderTest, givenEhlWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsEHL) { + auto container = CommandContainer(); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); + EXPECT_EQ(size, 88ul); +} + +HWTEST2_F(CommandEncoderTest, givenRklUsingRcsWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsRKL) { + auto container = CommandContainer(); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, true); EXPECT_EQ(size, 104ul); } -HWTEST2_F(CommandEncoderTest, givenLFKWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsLKF) { +HWTEST2_F(CommandEncoderTest, givenRklNotUsingRcsWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsRKL) { auto container = CommandContainer(); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); EXPECT_EQ(size, 88ul); } -HWTEST2_F(CommandEncoderTest, givenICLLPWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsICLLP) { +HWTEST2_F(CommandEncoderTest, givenLkfWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsLKF) { auto container = CommandContainer(); - size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); + EXPECT_EQ(size, 88ul); +} + +HWTEST2_F(CommandEncoderTest, givenIclLpWhenGettingRequiredSizeForStateBaseAddressCommandThenCorrectSizeIsReturned, IsICLLP) { + auto container = CommandContainer(); + size_t size = EncodeStateBaseAddress::getRequiredSizeForStateBaseAddress(*pDevice, container, false); EXPECT_EQ(size, 88ul); } diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index 55b2b02053..b142b3c98f 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -1236,6 +1236,7 @@ HWTEST_F(BindlessCommandEncodeStatesTest, givenGlobalBindlessHeapsWhenDispatchin false, false, false, + false, false}; EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); @@ -1282,6 +1283,7 @@ HWTEST_F(BindlessCommandEncodeStatesTest, givenBindlessModeDisabledelWithSampler false, false, false, + false, false}; EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); diff --git a/shared/test/unit_test/encoders/test_encode_states.cpp b/shared/test/unit_test/encoders/test_encode_states.cpp index cd176451d1..bcefd7d3e2 100644 --- a/shared/test/unit_test/encoders/test_encode_states.cpp +++ b/shared/test/unit_test/encoders/test_encode_states.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/ptr_math.h" #include "shared/source/helpers/string.h" @@ -297,8 +298,14 @@ HWTEST2_F(CommandEncodeStatesTest, givenCommandContainerWithDirtyHeapsWhenSetSta cmdContainer->setHeapDirty(NEO::HeapType::INDIRECT_OBJECT); cmdContainer->setHeapDirty(NEO::HeapType::SURFACE_STATE); + auto gmmHelper = cmdContainer->getDevice()->getRootDeviceEnvironment().getGmmHelper(); + uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); + STATE_BASE_ADDRESS sba; - EncodeStateBaseAddress::encode(*cmdContainer.get(), sba, false); + + EncodeStateBaseAddressArgs args = createDefaultEncodeStateBaseAddressArgs(cmdContainer.get(), sba, statelessMocsIndex); + + EncodeStateBaseAddress::encode(args); auto dsh = cmdContainer->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); auto ssh = cmdContainer->getIndirectHeap(NEO::HeapType::SURFACE_STATE); @@ -332,7 +339,12 @@ HWTEST_F(CommandEncodeStatesTest, givenCommandContainerWhenSetStateBaseAddressCa cmdContainer->dirtyHeaps = 0; STATE_BASE_ADDRESS sba; - EncodeStateBaseAddress::encode(*cmdContainer.get(), sba, false); + auto gmmHelper = cmdContainer->getDevice()->getRootDeviceEnvironment().getGmmHelper(); + uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); + + EncodeStateBaseAddressArgs args = createDefaultEncodeStateBaseAddressArgs(cmdContainer.get(), sba, statelessMocsIndex); + + EncodeStateBaseAddress::encode(args); auto dsh = cmdContainer->getIndirectHeap(NEO::HeapType::DYNAMIC_STATE); auto ssh = cmdContainer->getIndirectHeap(NEO::HeapType::SURFACE_STATE); diff --git a/shared/test/unit_test/fixtures/command_container_fixture.h b/shared/test/unit_test/fixtures/command_container_fixture.h index 1d8ecfab3e..f012e87412 100644 --- a/shared/test/unit_test/fixtures/command_container_fixture.h +++ b/shared/test/unit_test/fixtures/command_container_fixture.h @@ -53,11 +53,27 @@ class CommandEncodeStatesFixture : public DeviceFixture { false, // isCooperative false, // isHostScopeSignalEvent false, // isKernelUsingSystemAllocation - false // isKernelDispatchedFromImmediateCmdList + false, // isKernelDispatchedFromImmediateCmdList + false // isRcs }; return args; } + + template + EncodeStateBaseAddressArgs createDefaultEncodeStateBaseAddressArgs( + CommandContainer *container, + typename FamilyType::STATE_BASE_ADDRESS &sbaCmd, + uint32_t statelessMocs) { + EncodeStateBaseAddressArgs args = { + container, + sbaCmd, + statelessMocs, + false, + false, + false}; + return args; + } }; } // namespace NEO diff --git a/shared/test/unit_test/gen12lp/command_encoder_tests_gen12lp.cpp b/shared/test/unit_test/gen12lp/command_encoder_tests_gen12lp.cpp index ed8cc53377..0ba33b40b0 100644 --- a/shared/test/unit_test/gen12lp/command_encoder_tests_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/command_encoder_tests_gen12lp.cpp @@ -32,7 +32,7 @@ GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDefaultEngineIsR auto csr = std::make_unique>(*device.getExecutionEnvironment(), 0, device.getDeviceBitfield()); auto oldCsr = device.getDefaultEngine().commandStreamReceiver; device.getDefaultEngine().commandStreamReceiver = csr.get(); - EXPECT_EQ(2 * PreambleHelper::getCmdSizeForPipelineSelect(device.getHardwareInfo()), EncodeWA::getAdditionalPipelineSelectSize(device)); + EXPECT_EQ(2 * PreambleHelper::getCmdSizeForPipelineSelect(device.getHardwareInfo()), EncodeWA::getAdditionalPipelineSelectSize(device, csr->isRcs())); device.getDefaultEngine().commandStreamReceiver = oldCsr; } @@ -41,6 +41,6 @@ GEN12LPTEST_F(Gen12LpCommandEncodeTest, givenGen12LpPlatformWhenDefaultEngineIsN auto csr = std::make_unique>(*device.getExecutionEnvironment(), 0, device.getDeviceBitfield()); auto oldCsr = device.getDefaultEngine().commandStreamReceiver; device.getDefaultEngine().commandStreamReceiver = csr.get(); - EXPECT_EQ(0u, EncodeWA::getAdditionalPipelineSelectSize(device)); + EXPECT_EQ(0u, EncodeWA::getAdditionalPipelineSelectSize(device, csr->isRcs())); device.getDefaultEngine().commandStreamReceiver = oldCsr; -} \ No newline at end of file +} diff --git a/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp b/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp index 1eab825533..34062035d6 100644 --- a/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp +++ b/shared/test/unit_test/gen12lp/test_command_encoder_gen12lp.cpp @@ -7,17 +7,20 @@ #include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_container/command_encoder.h" +#include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/preamble.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/fixtures/device_fixture.h" #include "shared/test/common/test_macros/hw_test.h" +#include "shared/test/unit_test/fixtures/command_container_fixture.h" #include "reg_configs_common.h" using namespace NEO; using CommandEncoderTest = Test; +using CommandEncodeStatesTest = Test; GEN12LPTEST_F(CommandEncoderTest, WhenAdjustComputeModeIsCalledThenStateComputeModeShowsNonCoherencySet) { using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; @@ -68,7 +71,7 @@ struct MockOsContext : public OsContext { using OsContext::engineType; }; -GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditionalPipelineSelectWAIsAppliedOnlyToRcs) { +GEN12LPTEST_F(CommandEncodeStatesTest, givenVariousEngineTypesWhenEncodeSbaThenAdditionalPipelineSelectWAIsAppliedOnlyToRcs) { using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT; using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE; using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; @@ -78,9 +81,14 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditi auto ret = cmdContainer.initialize(pDevice, nullptr, true); ASSERT_EQ(ErrorCode::SUCCESS, ret); + auto gmmHelper = cmdContainer.getDevice()->getRootDeviceEnvironment().getGmmHelper(); + uint32_t statelessMocsIndex = (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER) >> 1); + { STATE_BASE_ADDRESS sba; - EncodeStateBaseAddress::encode(cmdContainer, sba, false); + EncodeStateBaseAddressArgs args = createDefaultEncodeStateBaseAddressArgs(&cmdContainer, sba, statelessMocsIndex); + args.isRcs = true; + EncodeStateBaseAddress::encode(args); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed()); @@ -95,10 +103,10 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEncodeSBAThenAdditi cmdContainer.reset(); { - static_cast(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS; - STATE_BASE_ADDRESS sba; - EncodeStateBaseAddress::encode(cmdContainer, sba, false); + EncodeStateBaseAddressArgs args = createDefaultEncodeStateBaseAddressArgs(&cmdContainer, sba, statelessMocsIndex); + args.isRcs = false; + EncodeStateBaseAddress::encode(args); GenCmdList commands; CmdParse::parseCommandBuffer(commands, ptrOffset(cmdContainer.getCommandStream()->getCpuBase(), 0), cmdContainer.getCommandStream()->getUsed());