diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h index ee937eb16f..9ffe4cfaba 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.h @@ -169,7 +169,6 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily dependenciesPresent{false}; }; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index b6f7210a43..45fc48b7fd 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -12,6 +12,7 @@ #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/command_stream/wait_status.h" #include "shared/source/debugger/debugger_l0.h" +#include "shared/source/direct_submission/relaxed_ordering_helper.h" #include "shared/source/helpers/bindless_heaps_helper.h" #include "shared/source/helpers/completion_stamp.h" #include "shared/source/helpers/hw_info.h" @@ -321,27 +322,13 @@ bool CommandListCoreFamilyImmediate::waitForEventsFromHost() { return true; } -template -bool CommandListCoreFamilyImmediate::isRelaxedOrderingDispatchAllowed(uint32_t numWaitEvents) const { - if (numWaitEvents == 0u) { - return false; - } - - uint32_t minimalNumberOfClients = 2; - if (NEO::DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.get() != -1) { - minimalNumberOfClients = static_cast(NEO::DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.get()); - } - - return (this->csr->getNumClients() >= minimalNumberOfClients && this->csr->directSubmissionRelaxedOrderingEnabled()); -} - template ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernel( ze_kernel_handle_t kernelHandle, const ze_group_count_t *threadGroupDimensions, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, const CmdListKernelLaunchParams &launchParams, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -365,7 +352,7 @@ template ze_result_t CommandListCoreFamilyImmediate::appendLaunchKernelIndirect( ze_kernel_handle_t kernelHandle, const ze_group_count_t *pDispatchArgumentsBuffer, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -402,7 +389,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -423,7 +410,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopy( NEO::TransferDirection direction; auto isSplitNeeded = this->isAppendSplitNeeded(dstptr, srcptr, size, direction); if (isSplitNeeded) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event ret = static_cast(this->device)->bcsSplit.appendSplitCall(this, dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, true, relaxedOrderingDispatch, direction, [&](void *dstptrParam, const void *srcptrParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) { return CommandListCoreFamily::appendMemoryCopy(dstptrParam, srcptrParam, sizeParam, hSignalEventParam, 0u, nullptr, relaxedOrderingDispatch); }); @@ -447,7 +434,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegio ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -459,7 +446,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryCopyRegio NEO::TransferDirection direction; auto isSplitNeeded = this->isAppendSplitNeeded(dstPtr, srcPtr, this->getTotalSizeForCopyRegion(dstRegion, dstPitch, dstSlicePitch), direction); if (isSplitNeeded) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event ret = static_cast(this->device)->bcsSplit.appendSplitCall(this, dstRegion->originX, srcRegion->originX, dstRegion->width, hSignalEvent, numWaitEvents, phWaitEvents, true, relaxedOrderingDispatch, direction, [&](uint32_t dstOriginXParam, uint32_t srcOriginXParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) { ze_copy_region_t dstRegionLocal = {}; ze_copy_region_t srcRegionLocal = {}; @@ -488,7 +475,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendMemoryFill(void ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -541,7 +528,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendPageFaultCopy(N bool relaxedOrdering = false; if (isSplitNeeded) { - relaxedOrdering = isRelaxedOrderingDispatchAllowed(1); // split generates more than 1 event + relaxedOrdering = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, 1); // split generates more than 1 event uintptr_t dstAddress = static_cast(dstAllocation->getGpuAddress()); uintptr_t srcAddress = static_cast(srcAllocation->getGpuAddress()); ret = static_cast(this->device)->bcsSplit.appendSplitCall(this, dstAddress, srcAddress, size, nullptr, 0u, nullptr, false, relaxedOrdering, direction, [&](uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) { @@ -615,7 +602,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyRegion ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -635,7 +622,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyFromMe ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -656,7 +643,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendImageCopyToMemo ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); @@ -690,7 +677,7 @@ ze_result_t CommandListCoreFamilyImmediate::appendLaunchCooperati ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *waitEventHandles, bool relaxedOrderingDispatch) { - relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents); + relaxedOrderingDispatch = NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*this->csr, numWaitEvents); if (this->isFlushTaskSubmissionEnabled) { checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch); diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h index ebb6ee85c7..4eb2798639 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h @@ -156,7 +156,6 @@ struct WhiteBox> using BaseClass::getHostPtrAlloc; using BaseClass::immediateCmdListHeapSharing; using BaseClass::isFlushTaskSubmissionEnabled; - using BaseClass::isRelaxedOrderingDispatchAllowed; using BaseClass::isSyncModeQueue; using BaseClass::isTbxMode; using BaseClass::partitionCount; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index 7ba856301c..8d01e2cffc 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/encode_surface_state.h" +#include "shared/source/direct_submission/relaxed_ordering_helper.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" @@ -1334,42 +1335,6 @@ HWTEST2_F(CommandListCreate, whenGettingCommandsToPatchThenCorrectValuesAreRetur EXPECT_EQ(&commandList->commandsToPatch, &commandList->getCommandsToPatch()); } -HWTEST2_F(CommandListCreate, givenNumClientsWhenAskingIfRelaxedOrderingEnabledThenReturnCorrectValue, IsAtLeastXeHpcCore) { - DebugManagerStateRestore restore; - DebugManager.flags.DirectSubmissionRelaxedOrdering.set(1); - - auto commandList = std::make_unique>>(); - commandList->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver; - - auto ultCsr = static_cast *>(commandList->csr); - ultCsr->registerClient(); - ultCsr->recordFlusheBatchBuffer = true; - - auto directSubmission = new NEO::MockDirectSubmissionHw>(*ultCsr); - ultCsr->directSubmission.reset(directSubmission); - - EXPECT_EQ(1u, ultCsr->getNumClients()); - EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(1)); - - ultCsr->registerClient(); - - EXPECT_EQ(2u, ultCsr->getNumClients()); - EXPECT_TRUE(commandList->isRelaxedOrderingDispatchAllowed(1)); - - DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.set(4); - - EXPECT_EQ(2u, ultCsr->getNumClients()); - EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(1)); - - ultCsr->registerClient(); - EXPECT_EQ(3u, ultCsr->getNumClients()); - EXPECT_FALSE(commandList->isRelaxedOrderingDispatchAllowed(1)); - - ultCsr->registerClient(); - EXPECT_EQ(4u, ultCsr->getNumClients()); - EXPECT_TRUE(commandList->isRelaxedOrderingDispatchAllowed(1)); -} - HWTEST2_F(CommandListCreate, givenNonEmptyCommandsToPatchWhenClearCommandsToPatchIsCalledThenCommandsAreCorrectlyCleared, IsAtLeastSkl) { using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE; diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 72c2e0ef2d..8341e0bcc2 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -373,7 +373,7 @@ class CommandStreamReceiver { virtual SubmissionStatus initializeDeviceWithFirstSubmission() = 0; - uint32_t getNumClients() { + uint32_t getNumClients() const { return this->numClients.load(); } uint32_t registerClient() { diff --git a/shared/source/direct_submission/CMakeLists.txt b/shared/source/direct_submission/CMakeLists.txt index c6b7101583..e616445fb7 100644 --- a/shared/source/direct_submission/CMakeLists.txt +++ b/shared/source/direct_submission/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright (C) 2020-2022 Intel Corporation +# Copyright (C) 2020-2023 Intel Corporation # # SPDX-License-Identifier: MIT # @@ -17,6 +17,7 @@ set(NEO_CORE_DIRECT_SUBMISSION ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_hw_diagnostic_mode.cpp ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_hw_diagnostic_mode.h ${CMAKE_CURRENT_SOURCE_DIR}/direct_submission_properties.h + ${CMAKE_CURRENT_SOURCE_DIR}/relaxed_ordering_helper.cpp ${CMAKE_CURRENT_SOURCE_DIR}/relaxed_ordering_helper.h ) diff --git a/shared/source/direct_submission/relaxed_ordering_helper.cpp b/shared/source/direct_submission/relaxed_ordering_helper.cpp new file mode 100644 index 0000000000..16b83fb19d --- /dev/null +++ b/shared/source/direct_submission/relaxed_ordering_helper.cpp @@ -0,0 +1,30 @@ +/* + * Copyright (C) 2023 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/direct_submission/relaxed_ordering_helper.h" + +#include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/debug_settings/debug_settings_manager.h" + +namespace NEO { +namespace RelaxedOrderingHelper { + +bool isRelaxedOrderingDispatchAllowed(const CommandStreamReceiver &csr, uint32_t numWaitEvents) { + if (numWaitEvents == 0u) { + return false; + } + + uint32_t minimalNumberOfClients = 2; + if (DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.get() != -1) { + minimalNumberOfClients = static_cast(DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.get()); + } + + return (csr.getNumClients() >= minimalNumberOfClients && csr.directSubmissionRelaxedOrderingEnabled()); +} + +} // namespace RelaxedOrderingHelper +} // namespace NEO \ No newline at end of file diff --git a/shared/source/direct_submission/relaxed_ordering_helper.h b/shared/source/direct_submission/relaxed_ordering_helper.h index ec3c8db51f..f042fea938 100644 --- a/shared/source/direct_submission/relaxed_ordering_helper.h +++ b/shared/source/direct_submission/relaxed_ordering_helper.h @@ -11,7 +11,10 @@ #include "shared/source/command_container/encode_alu_helper.h" namespace NEO { +class CommandStreamReceiver; + namespace RelaxedOrderingHelper { +bool isRelaxedOrderingDispatchAllowed(const CommandStreamReceiver &csr, uint32_t numWaitEvents); static constexpr uint32_t queueSizeMultiplier = 4; diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index f03e5f671b..53a35c42c3 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -2941,4 +2941,34 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenReturnPtrsRequiredWhenAskin size_t sizeWitfRetPtr = directSubmission.getSizeDispatch(true, true); EXPECT_EQ(baseSize + RelaxedOrderingHelper::getSizeReturnPtrRegs(), sizeWitfRetPtr); +} + +HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenNumClientsWhenAskingIfRelaxedOrderingEnabledThenReturnCorrectValue, IsAtLeastXeHpcCore) { + auto ultCsr = static_cast *>(pDevice->getDefaultEngine().commandStreamReceiver); + + ultCsr->registerClient(); + + auto directSubmission = new NEO::MockDirectSubmissionHw>(*ultCsr); + ultCsr->directSubmission.reset(directSubmission); + + EXPECT_EQ(1u, ultCsr->getNumClients()); + EXPECT_FALSE(NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*ultCsr, 1)); + + ultCsr->registerClient(); + + EXPECT_EQ(2u, ultCsr->getNumClients()); + EXPECT_TRUE(NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*ultCsr, 1)); + + DebugManager.flags.DirectSubmissionRelaxedOrderingMinNumberOfClients.set(4); + + EXPECT_EQ(2u, ultCsr->getNumClients()); + EXPECT_FALSE(NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*ultCsr, 1)); + + ultCsr->registerClient(); + EXPECT_EQ(3u, ultCsr->getNumClients()); + EXPECT_FALSE(NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*ultCsr, 1)); + + ultCsr->registerClient(); + EXPECT_EQ(4u, ultCsr->getNumClients()); + EXPECT_TRUE(NEO::RelaxedOrderingHelper::isRelaxedOrderingDispatchAllowed(*ultCsr, 1)); } \ No newline at end of file