diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index e817c35558..8524811246 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2007,14 +2007,17 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using VFE_STATE_TYPE = typename GfxFamily::VFE_STATE_TYPE; + auto &hwInfo = device->getHwInfo(); + auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); + auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo); + if (!containsAnyKernel) { - requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), false, device->getHwInfo()); + requiredStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, device->getHwInfo()); finalStreamState = requiredStreamState; containsAnyKernel = true; } - auto &hwInfo = device->getHwInfo(); - finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), false, hwInfo); + finalStreamState.frontEndState.setProperties(kernel.usesSyncBuffer(), disableOverdispatch, hwInfo); if (finalStreamState.frontEndState.isDirty()) { auto pVfeStateAddress = NEO::PreambleHelper::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType); auto pVfeState = new VFE_STATE_TYPE; @@ -2025,7 +2028,7 @@ void CommandListCoreFamily::updateStreamProperties(Kernel &kernel auto &kernelAttributes = kernel.getKernelDescriptor().kernelAttributes; auto &neoDevice = *device->getNEODevice(); - auto threadArbitrationPolicy = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily).getDefaultThreadArbitrationPolicy(); + auto threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, threadArbitrationPolicy); NEO::EncodeWA::encodeAdditionalPipelineSelect(neoDevice, *commandContainer.getCommandStream(), true); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp index 7409ae05e7..56dbdecc5f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel.cpp @@ -1292,5 +1292,31 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenKernelUsingSyncBufferWhenAppendLau } } +using Platforms = IsAtLeastProduct; + +HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenRequiredStateAndFinalStateAreCorrectlySet, Platforms) { + Mock<::L0::Kernel> kernel; + auto pMockModule = std::unique_ptr(new Mock(device, nullptr)); + kernel.module = pMockModule.get(); + + auto pCommandList = std::make_unique>>(); + auto result = pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(-1, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); + EXPECT_EQ(-1, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); + + auto &hwHelper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); + int32_t expectedDisableOverdispatch = hwHelper.isDisableOverdispatchAvailable(*defaultHwInfo); + + pCommandList->updateStreamProperties(kernel, false); + EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); + EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); + + pCommandList->updateStreamProperties(kernel, false); + EXPECT_EQ(expectedDisableOverdispatch, pCommandList->requiredStreamState.frontEndState.disableOverdispatch.value); + EXPECT_EQ(expectedDisableOverdispatch, pCommandList->finalStreamState.frontEndState.disableOverdispatch.value); +} + } // namespace ult } // namespace L0 diff --git a/opencl/source/kernel/kernel.h b/opencl/source/kernel/kernel.h index 44bd1f00e2..b17961264b 100644 --- a/opencl/source/kernel/kernel.h +++ b/opencl/source/kernel/kernel.h @@ -559,7 +559,7 @@ class Kernel : public ReferenceTrackedObject { UnifiedMemoryControls unifiedMemoryControls{}; bool isUnifiedMemorySyncRequired = true; bool debugEnabled = false; - uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; + uint32_t additionalKernelExecInfo = AdditionalKernelExecInfo::DisableOverdispatch; uint32_t *maxWorkGroupSizeForCrossThreadData = &Kernel::dummyPatchLocation; uint32_t maxKernelWorkGroupSize = 0; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 4f22b37d8e..67e5cd3d16 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -5,8 +5,13 @@ * */ +#include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/command_stream/scratch_space_controller_base.h" +#include "shared/source/helpers/constants.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/mocks/mock_command_stream_receiver.h" +#include "shared/test/common/mocks/mock_device.h" +#include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/unit_test/utilities/base_object_utils.h" #include "opencl/source/event/user_event.h" @@ -563,6 +568,46 @@ HWTEST_F(CommandStreamReceiverHwTest, WhenForceEnableGpuIdleImplicitFlushThenExp EXPECT_TRUE(commandStreamReceiver->useGpuIdleImplicitFlush); } +HWTEST2_F(CommandStreamReceiverHwTest, whenProgramVFEStateIsCalledThenCorrectComputeOverdispatchDisableValueIsProgrammed, IsAtLeastXeHpCore) { + using CFE_STATE = typename FamilyType::CFE_STATE; + + UltDeviceFactory deviceFactory{1, 0}; + auto pDevice = deviceFactory.rootDevices[0]; + auto pHwInfo = pDevice->getRootDeviceEnvironment().getMutableHardwareInfo(); + auto &hwHelper = HwHelper::get(pHwInfo->platform.eRenderCoreFamily); + + uint8_t memory[1 * KB]; + auto mockCsr = std::make_unique>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), + pDevice->getDeviceBitfield()); + MockOsContext osContext{0, 8, EngineTypeUsage{aub_stream::ENGINE_CCS, EngineUsage::Regular}, PreemptionMode::Disabled, false}; + mockCsr->setupContext(osContext); + + uint32_t revisions[] = {REVISION_A0, REVISION_B}; + for (auto revision : revisions) { + pHwInfo->platform.usRevId = hwHelper.getHwRevIdFromStepping(revision, *pHwInfo); + + { + auto flags = DispatchFlagsHelper::createDefaultDispatchFlags(); + LinearStream commandStream{&memory, sizeof(memory)}; + mockCsr->mediaVfeStateDirty = true; + mockCsr->programVFEState(commandStream, flags, 10); + auto pCommand = reinterpret_cast(&memory); + + auto expectedDisableOverdispatch = hwHelper.isDisableOverdispatchAvailable(*pHwInfo); + EXPECT_EQ(expectedDisableOverdispatch, pCommand->getComputeOverdispatchDisable()); + } + { + auto flags = DispatchFlagsHelper::createDefaultDispatchFlags(); + flags.additionalKernelExecInfo = AdditionalKernelExecInfo::NotSet; + LinearStream commandStream{&memory, sizeof(memory)}; + mockCsr->mediaVfeStateDirty = true; + mockCsr->programVFEState(commandStream, flags, 10); + auto pCommand = reinterpret_cast(&memory); + EXPECT_FALSE(pCommand->getComputeOverdispatchDisable()); + } + } +} + HWTEST_F(BcsTests, WhenGetNumberOfBlitsForCopyPerRowIsCalledThenCorrectValuesAreReturned) { auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment(); auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index 8bf3e5f568..e2b5dad3e6 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -1292,10 +1292,9 @@ HWTEST_F(HwHelperTest, givenHwHelperWhenIsBlitterForImagesSupportedIsCalledThenF EXPECT_FALSE(helper.isBlitterForImagesSupported(*defaultHwInfo)); } -HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenAdditionalKernelExecInfoSupportCheckedThenReturnFalse) { +HWCMDTEST_F(IGFX_GEN8_CORE, HwHelperTest, givenHwHelperWhenAdditionalKernelExecInfoSupportCheckedThenCorrectValueIsReturned) { auto &helper = HwHelper::get(renderCoreFamily); - - EXPECT_FALSE(helper.additionalKernelExecInfoSupported(*defaultHwInfo)); + EXPECT_FALSE(helper.isDisableOverdispatchAvailable(*defaultHwInfo)); } TEST_F(HwHelperTest, WhenGettingIsCpuImageTransferPreferredThenFalseIsReturned) { diff --git a/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp b/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp index 36c1cf5af8..0fd5021626 100644 --- a/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp +++ b/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp @@ -298,3 +298,12 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenXeHpCoreWhenIsBlitterForImagesSup auto &helper = HwHelper::get(defaultHwInfo->platform.eRenderCoreFamily); EXPECT_TRUE(helper.isBlitterForImagesSupported(hwInfo)); } + +XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenHwHelperWhenAdditionalKernelExecInfoSupportCheckedThenCorrectValueIsReturned) { + auto &hwHelper = HwHelper::get(renderCoreFamily); + auto hwInfo = *defaultHwInfo; + EXPECT_FALSE(hwHelper.isDisableOverdispatchAvailable(hwInfo)); + + hwInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(REVISION_B, hwInfo); + EXPECT_TRUE(hwHelper.isDisableOverdispatchAvailable(hwInfo)); +} diff --git a/opencl/test/unit_test/xe_hp_core/test_cmds_programming_xe_hp_core.cpp b/opencl/test/unit_test/xe_hp_core/test_cmds_programming_xe_hp_core.cpp index c586180a76..1bd5f81ab4 100644 --- a/opencl/test/unit_test/xe_hp_core/test_cmds_programming_xe_hp_core.cpp +++ b/opencl/test/unit_test/xe_hp_core/test_cmds_programming_xe_hp_core.cpp @@ -42,35 +42,3 @@ XE_HP_CORE_TEST_F(CmdsProgrammingTestsXeHpCore, givenL1CachingOverrideWhenStateB memoryManager->freeGraphicsMemory(allocation); } - -XE_HP_CORE_TEST_F(CmdsProgrammingTestsXeHpCore, givenInterfaceDescriptorDataWhenBSteppingIsDetectedThenTGBatchSizeIsEqualTo3) { - using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; - - INTERFACE_DESCRIPTOR_DATA iddArg; - iddArg = FamilyType::cmdInitInterfaceDescriptorData; - - pDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId = REVISION_B; - - EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, pDevice->getHardwareInfo()); - EXPECT_EQ(3u, iddArg.getThreadGroupDispatchSize()); -} - -using PreambleCfeState = PreambleFixture; - -XE_HP_CORE_TEST_F(PreambleCfeState, givenXehpBSteppingWhenCfeIsProgrammedThenOverdispatchIsDisabled) { - using CFE_STATE = typename FamilyType::CFE_STATE; - - auto backup = defaultHwInfo->platform.usRevId; - defaultHwInfo->platform.usRevId = REVISION_B; - - auto pVfeCmd = PreambleHelper::getSpaceForVfeState(&linearStream, *defaultHwInfo, EngineGroupType::RenderCompute); - StreamProperties streamProperties{}; - PreambleHelper::programVfeState(pVfeCmd, *defaultHwInfo, 0u, 0, 0, AdditionalKernelExecInfo::NotApplicable, streamProperties); - parseCommands(linearStream); - auto cfeStateIt = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), cfeStateIt); - auto cfeState = reinterpret_cast(*cfeStateIt); - - EXPECT_TRUE(cfeState->getComputeOverdispatchDisable()); - defaultHwInfo->platform.usRevId = backup; -} diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index f749df6d12..2c7b97f2e3 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -933,10 +933,11 @@ inline void CommandStreamReceiverHw::programVFEState(LinearStream &cs auto &hwHelper = NEO::HwHelper::get(hwInfo.platform.eRenderCoreFamily); auto engineGroupType = hwHelper.getEngineGroupType(getOsContext().getEngineType(), hwInfo); auto pVfeState = PreambleHelper::getSpaceForVfeState(&csr, hwInfo, engineGroupType); + auto disableOverdispatch = hwHelper.isDisableOverdispatchAvailable(hwInfo) && + (dispatchFlags.additionalKernelExecInfo != AdditionalKernelExecInfo::NotSet); StreamProperties streamProperties{}; streamProperties.frontEndState.setProperties(lastKernelExecutionType == KernelExecutionType::Concurrent, - dispatchFlags.additionalKernelExecInfo == AdditionalKernelExecInfo::DisableOverdispatch, - hwInfo); + disableOverdispatch, hwInfo); PreambleHelper::programVfeState( pVfeState, hwInfo, requiredScratchSize, getScratchPatchAddress(), maxFrontEndThreads, lastAdditionalKernelExecInfo, streamProperties); diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index c3f5e527ad..091e47748f 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -134,7 +134,7 @@ class HwHelper { virtual bool isCopyOnlyEngineType(EngineGroupType type) const = 0; virtual void adjustAddressWidthForCanonize(uint32_t &addressWidth) const = 0; virtual bool isSipWANeeded(const HardwareInfo &hwInfo) const = 0; - virtual bool additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const = 0; + virtual bool isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const = 0; virtual bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const = 0; virtual bool isKmdMigrationSupported(const HardwareInfo &hwInfo) const = 0; virtual bool isNewResidencyModelSupported() const = 0; @@ -359,7 +359,7 @@ class HwHelperHw : public HwHelper { bool isSipWANeeded(const HardwareInfo &hwInfo) const override; - bool additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const override; + bool isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const override; bool isCpuImageTransferPreferred(const HardwareInfo &hwInfo) const override; diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index e9f4a60c8c..290e14b617 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -462,7 +462,7 @@ inline bool HwHelperHw::isBlitCopyRequiredForLocalMemory(const Hardwa } template -bool HwHelperHw::additionalKernelExecInfoSupported(const HardwareInfo &hwInfo) const { +bool HwHelperHw::isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const { return false; } diff --git a/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp index 6e1b9b86bb..334fcccb8a 100644 --- a/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp @@ -44,9 +44,8 @@ void EncodeDispatchKernel::programBarrierEnable(INTERFACE_DESCRIPTOR_DAT template <> void EncodeDispatchKernel::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) { - auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - - if (helper.getSteppingFromHwRevId(hwInfo) >= REVISION_B) { + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + if (hwHelper.isDisableOverdispatchAvailable(hwInfo)) { interfaceDescriptor.setThreadGroupDispatchSize(3u); } diff --git a/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp b/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp index 3935a0534e..8bdba1f841 100644 --- a/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp @@ -174,6 +174,11 @@ bool HwHelperHw::isBlitterForImagesSupported(const HardwareInfo &hwInfo) return true; } +template <> +bool HwHelperHw::isDisableOverdispatchAvailable(const HardwareInfo &hwInfo) const { + return (this->getSteppingFromHwRevId(hwInfo) >= REVISION_B); +} + template <> void MemorySynchronizationCommands::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { using PIPE_CONTROL = typename Family::PIPE_CONTROL; diff --git a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp index 7752d77689..92a580db95 100644 --- a/shared/source/xe_hp_core/preamble_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/preamble_xe_hp_core.cpp @@ -22,11 +22,6 @@ void PreambleHelper::appendProgramVFEState(const HardwareInfo &hwInfo, c command->setComputeOverdispatchDisable(streamProperties.frontEndState.disableOverdispatch.value == 1); - auto &helper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); - if (helper.getSteppingFromHwRevId(hwInfo) >= REVISION_B) { - command->setComputeOverdispatchDisable(true); - } - if (DebugManager.flags.CFEComputeOverdispatchDisable.get() != -1) { command->setComputeOverdispatchDisable(DebugManager.flags.CFEComputeOverdispatchDisable.get()); } diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_plus.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_plus.cpp index 04bfb832e7..5290530f83 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_plus.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_plus.cpp @@ -551,13 +551,23 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInterfaceDescriptorDa INTERFACE_DESCRIPTOR_DATA iddArg; iddArg = FamilyType::cmdInitInterfaceDescriptorData; const uint32_t forceThreadGroupDispatchSize = -1; - const uint32_t defaultThreadGroupDispatchSize = iddArg.getThreadGroupDispatchSize(); + auto hwInfo = pDevice->getHardwareInfo(); + auto &hwHelper = HwHelper::get(renderCoreFamily); DebugManagerStateRestore restorer; DebugManager.flags.ForceThreadGroupDispatchSize.set(forceThreadGroupDispatchSize); - EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, pDevice->getHardwareInfo()); - EXPECT_EQ(defaultThreadGroupDispatchSize, iddArg.getThreadGroupDispatchSize()); + uint32_t revisions[] = {REVISION_A0, REVISION_B}; + for (auto revision : revisions) { + hwInfo.platform.usRevId = hwHelper.getHwRevIdFromStepping(revision, hwInfo); + EncodeDispatchKernel::adjustInterfaceDescriptorData(iddArg, hwInfo); + + if (hwHelper.isDisableOverdispatchAvailable(hwInfo)) { + EXPECT_EQ(3u, iddArg.getThreadGroupDispatchSize()); + } else { + EXPECT_EQ(0u, iddArg.getThreadGroupDispatchSize()); + } + } } HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenInterfaceDescriptorDataWhenForceThreadGroupDispatchSizeVariableIsSetThenThreadGroupDispatchSizeIsChanged) {