diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 2bd9863ee7..0f203132c4 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -336,6 +336,12 @@ ze_result_t CommandListCoreFamily::initialize(Device *device, NEO enableSynchronizedDispatch((NEO::debugManager.flags.ForceSynchronizedDispatchMode.get() == 1) ? NEO::SynchronizedDispatchMode::full : NEO::SynchronizedDispatchMode::disabled); } + const bool copyOffloadSupported = l0GfxCoreHelper.isDefaultCmdListWithCopyOffloadSupported() || (NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 2); + + if (copyOffloadSupported && !this->internalUsage && !isCopyOffloadEnabled()) { + enableCopyOperationOffload(); + } + return returnType; } diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index 57ed966746..e9e04a0850 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -258,38 +258,57 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device commandList->copyThroughLockedPtrEnabled = gfxCoreHelper.copyThroughLockedPtrEnabled(hwInfo, productHelper); - const bool cmdListSupportsCopyOffload = !commandList->isCopyOnly(false) && commandList->isInOrderExecutionEnabled() && !productHelper.isDcFlushAllowed() && deviceImp->tryGetCopyEngineOrdinal().has_value(); + const bool cmdListSupportsCopyOffload = commandList->isInOrderExecutionEnabled() && !productHelper.isDcFlushAllowed(); if ((NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 1 || queueProperties.copyOffloadHint) && cmdListSupportsCopyOffload) { - commandList->enableCopyOperationOffload(productFamily, device, desc); + commandList->enableCopyOperationOffload(); } - - return commandList; } return commandList; } -void CommandListImp::enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc) { - this->copyOffloadMode = device->getL0GfxCoreHelper().getDefaultCopyOffloadMode(); - - if (this->copyOffloadMode != CopyOffloadModes::dualStream) { +void CommandListImp::enableCopyOperationOffload() { + if (isCopyOnly(false)) { return; } + this->copyOffloadMode = device->getL0GfxCoreHelper().getDefaultCopyOffloadMode(); + + if (this->copyOffloadMode != CopyOffloadModes::dualStream || !isImmediateType()) { + // No need to create internal bcs queue + return; + } + + if (!static_cast(device)->tryGetCopyEngineOrdinal().has_value()) { + this->copyOffloadMode = CopyOffloadModes::disabled; + return; + } + + auto &computeOsContext = getCsr(false)->getOsContext(); + + ze_command_queue_priority_t immediateQueuePriority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + if (computeOsContext.isHighPriority()) { + immediateQueuePriority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH; + } else if (computeOsContext.isLowPriority()) { + immediateQueuePriority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; + } + + ze_command_queue_mode_t immediateQueueMode = this->isSyncModeQueue ? ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS : ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + NEO::CommandStreamReceiver *copyCsr = nullptr; uint32_t ordinal = static_cast(device)->getCopyEngineOrdinal(); - device->getCsrForOrdinalAndIndex(©Csr, ordinal, 0, desc->priority, false); + device->getCsrForOrdinalAndIndex(©Csr, ordinal, 0, immediateQueuePriority, false); UNRECOVERABLE_IF(!copyCsr); ze_command_queue_desc_t copyQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC}; copyQueueDesc.ordinal = ordinal; - copyQueueDesc.mode = desc->mode; - copyQueueDesc.priority = desc->priority; + copyQueueDesc.mode = immediateQueueMode; + copyQueueDesc.priority = immediateQueuePriority; ze_result_t returnValue = ZE_RESULT_SUCCESS; - auto offloadCommandQueue = CommandQueue::create(productFamily, device, copyCsr, ©QueueDesc, true, false, true, returnValue); + auto offloadCommandQueue = CommandQueue::create(device->getHwInfo().platform.eProductFamily, device, copyCsr, ©QueueDesc, true, false, true, returnValue); UNRECOVERABLE_IF(!offloadCommandQueue); this->cmdQImmediateCopyOffload = offloadCommandQueue; diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.h b/level_zero/core/source/cmdlist/cmdlist_imp.h index 6a8af6b295..46328415ae 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.h +++ b/level_zero/core/source/cmdlist/cmdlist_imp.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -46,7 +46,7 @@ struct CommandListImp : public CommandList { virtual void patchInOrderCmds() = 0; void enableSynchronizedDispatch(NEO::SynchronizedDispatchMode mode); NEO::SynchronizedDispatchMode getSynchronizedDispatchMode() const { return synchronizedDispatchMode; } - void enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc); + void enableCopyOperationOffload(); void setInterruptEventsCsr(NEO::CommandStreamReceiver &csr); protected: diff --git a/level_zero/core/source/device/device_imp.cpp b/level_zero/core/source/device/device_imp.cpp index 7088b1e2dc..a559333dc7 100644 --- a/level_zero/core/source/device/device_imp.cpp +++ b/level_zero/core/source/device/device_imp.cpp @@ -271,7 +271,7 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc, const bool copyOffloadAllowed = cmdList->isInOrderExecutionEnabled() && !getProductHelper().isDcFlushAllowed() && (getL0GfxCoreHelper().getDefaultCopyOffloadMode() != CopyOffloadModes::dualStream); if (copyOffloadHint && copyOffloadAllowed) { - cmdList->enableCopyOperationOffload(productFamily, this, nullptr); + cmdList->enableCopyOperationOffload(); } if (returnValue != ZE_RESULT_SUCCESS) { diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h index c2ef16fe1b..b74d6257b9 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h @@ -114,6 +114,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper { NEO::DeviceBitfield deviceBitfield) const = 0; virtual uint64_t getOaTimestampValidBits() const = 0; virtual CopyOffloadMode getDefaultCopyOffloadMode() const = 0; + virtual bool isDefaultCmdListWithCopyOffloadSupported() const = 0; protected: L0GfxCoreHelper() = default; @@ -170,6 +171,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper { NEO::DeviceBitfield deviceBitfield) const override; uint64_t getOaTimestampValidBits() const override; CopyOffloadMode getDefaultCopyOffloadMode() const override; + bool isDefaultCmdListWithCopyOffloadSupported() const override; protected: L0GfxCoreHelperHw() = default; diff --git a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl index 07a14055a0..c5b7147152 100644 --- a/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl +++ b/level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper_base.inl @@ -104,4 +104,9 @@ CopyOffloadMode L0GfxCoreHelperHw::getDefaultCopyOffloadMode() const { return CopyOffloadModes::dualStream; } +template +bool L0GfxCoreHelperHw::isDefaultCmdListWithCopyOffloadSupported() const { + return false; +} + } // namespace L0 diff --git a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h index 1c1fdcedee..3f8bcaccd8 100644 --- a/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h +++ b/level_zero/core/test/unit_tests/fixtures/in_order_cmd_list_fixture.h @@ -194,7 +194,7 @@ struct InOrderCmdListFixture : public ::Test { cmdList->enableInOrderExecution(); if (copyOffloadEnabled) { - cmdList->enableCopyOperationOffload(device->getHwInfo().platform.eProductFamily, device, &desc); + cmdList->enableCopyOperationOffload(); cmdList->copyOperationFenceSupported = device->getProductHelper().isDeviceToHostCopySignalingFenceRequired(); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp index e2165cebc1..82b6ee157f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist_2.cpp @@ -190,6 +190,22 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonDualStreamModeAndProfilingEventWithRe } } +HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingRegularCmdListThenEnableCopyOffload, IsAtLeastXeHpCore) { + NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(1); + + auto regularCmdList = createRegularCmdList(false); + EXPECT_EQ(CopyOffloadModes::disabled, regularCmdList->copyOffloadMode); + + NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(2); + + regularCmdList = createRegularCmdList(false); + EXPECT_NE(CopyOffloadModes::disabled, regularCmdList->copyOffloadMode); + EXPECT_EQ(nullptr, regularCmdList->cmdQImmediateCopyOffload); + + regularCmdList = createRegularCmdList(true); + EXPECT_EQ(CopyOffloadModes::disabled, regularCmdList->copyOffloadMode); +} + HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnableCopyOffload, IsAtLeastXeHpCore) { NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(1); @@ -213,11 +229,14 @@ HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnabl EXPECT_NE(CopyOffloadModes::disabled, cmdList->copyOffloadMode); EXPECT_NE(nullptr, cmdList->cmdQImmediateCopyOffload); - auto queue = static_cast *>(cmdList->cmdQImmediateCopyOffload); - EXPECT_EQ(cmdQueueDesc.priority, queue->desc.priority); - EXPECT_EQ(cmdQueueDesc.mode, queue->desc.mode); - EXPECT_TRUE(queue->peekIsCopyOnlyCommandQueue()); - EXPECT_TRUE(NEO::EngineHelpers::isBcs(queue->getCsr()->getOsContext().getEngineType())); + auto copyQueue = static_cast *>(cmdList->cmdQImmediateCopyOffload); + auto computeQueue = static_cast *>(cmdList->cmdQImmediate); + + EXPECT_EQ(computeQueue->getCsr()->getOsContext().isHighPriority(), copyQueue->getCsr()->getOsContext().isHighPriority()); + + EXPECT_EQ(cmdQueueDesc.mode, copyQueue->desc.mode); + EXPECT_TRUE(copyQueue->peekIsCopyOnlyCommandQueue()); + EXPECT_TRUE(NEO::EngineHelpers::isBcs(copyQueue->getCsr()->getOsContext().getEngineType())); } zeCommandListDestroy(cmdListHandle); @@ -237,11 +256,13 @@ HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnabl EXPECT_NE(CopyOffloadModes::disabled, cmdList->copyOffloadMode); EXPECT_NE(nullptr, cmdList->cmdQImmediateCopyOffload); - auto queue = static_cast *>(cmdList->cmdQImmediateCopyOffload); - EXPECT_EQ(cmdQueueDesc.priority, queue->desc.priority); - EXPECT_EQ(cmdQueueDesc.mode, queue->desc.mode); - EXPECT_TRUE(queue->peekIsCopyOnlyCommandQueue()); - EXPECT_TRUE(NEO::EngineHelpers::isBcs(queue->getCsr()->getOsContext().getEngineType())); + auto copyQueue = static_cast *>(cmdList->cmdQImmediateCopyOffload); + auto computeQueue = static_cast *>(cmdList->cmdQImmediate); + EXPECT_EQ(computeQueue->getCsr()->getOsContext().isHighPriority(), copyQueue->getCsr()->getOsContext().isHighPriority()); + + EXPECT_EQ(cmdQueueDesc.mode, copyQueue->desc.mode); + EXPECT_TRUE(copyQueue->peekIsCopyOnlyCommandQueue()); + EXPECT_TRUE(NEO::EngineHelpers::isBcs(copyQueue->getCsr()->getOsContext().getEngineType())); } zeCommandListDestroy(cmdListHandle); diff --git a/shared/source/debug_settings/debug_variables_base.inl b/shared/source/debug_settings/debug_variables_base.inl index 0e1d5a3852..b5b6ec0c0c 100644 --- a/shared/source/debug_settings/debug_variables_base.inl +++ b/shared/source/debug_settings/debug_variables_base.inl @@ -266,7 +266,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionNumber, -1, "Call exit(0) on X s DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionMode, 0, "Exit on X submission mode. 0: Any context type, 1: Compute context only, 2: Copy context only ") DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderImmediateCmdListExecution, -1, "-1: default, 0: disabled, 1: all Immediate Command Lists are switched to in-order execution") DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderEvents, -1, "-1: default, 0: disabled, 1: Enable all Events as in-order, to rely on command list counter value") -DECLARE_DEBUG_VARIABLE(int32_t, ForceCopyOperationOffloadForComputeCmdList, -1, "-1: default, 0: disabled, 1: Enabled. If set, all immediate compute in-order cmdlist will try to offload copy operations to copy engine") +DECLARE_DEBUG_VARIABLE(int32_t, ForceCopyOperationOffloadForComputeCmdList, -1, "-1: default, 0: disabled, 1: Enabled for immediate in-order cmd lists, 2: Enabled for all types. If enabled, all compute cmdlist will try to offload copy operations to copy engine") DECLARE_DEBUG_VARIABLE(int32_t, EnableImplicitConvertionToCounterBasedEvents, -1, "-1: default, 0: Disable, 1: Enable. If enabled, try to convert Regular Events used on Immediate CL to CounterBased") DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush disabled, 1: Tlb Flush enabled") DECLARE_DEBUG_VARIABLE(int32_t, AllowDcFlush, -1, "-1: default, 0: DC flush disabled, 1: DC flush enabled")