feature: allow enabling copy offload on default cmd list

Related-To: NEO-7067

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski 2025-05-15 15:10:38 +00:00 committed by Compute-Runtime-Automation
parent c5c87ab6f9
commit 7f7aa36c52
9 changed files with 80 additions and 27 deletions

View File

@ -336,6 +336,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
enableSynchronizedDispatch((NEO::debugManager.flags.ForceSynchronizedDispatchMode.get() == 1) ? NEO::SynchronizedDispatchMode::full : NEO::SynchronizedDispatchMode::disabled);
}
const bool copyOffloadSupported = l0GfxCoreHelper.isDefaultCmdListWithCopyOffloadSupported() || (NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 2);
if (copyOffloadSupported && !this->internalUsage && !isCopyOffloadEnabled()) {
enableCopyOperationOffload();
}
return returnType;
}

View File

@ -258,38 +258,57 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
commandList->copyThroughLockedPtrEnabled = gfxCoreHelper.copyThroughLockedPtrEnabled(hwInfo, productHelper);
const bool cmdListSupportsCopyOffload = !commandList->isCopyOnly(false) && commandList->isInOrderExecutionEnabled() && !productHelper.isDcFlushAllowed() && deviceImp->tryGetCopyEngineOrdinal().has_value();
const bool cmdListSupportsCopyOffload = commandList->isInOrderExecutionEnabled() && !productHelper.isDcFlushAllowed();
if ((NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 1 || queueProperties.copyOffloadHint) && cmdListSupportsCopyOffload) {
commandList->enableCopyOperationOffload(productFamily, device, desc);
commandList->enableCopyOperationOffload();
}
return commandList;
}
return commandList;
}
void CommandListImp::enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc) {
this->copyOffloadMode = device->getL0GfxCoreHelper().getDefaultCopyOffloadMode();
if (this->copyOffloadMode != CopyOffloadModes::dualStream) {
void CommandListImp::enableCopyOperationOffload() {
if (isCopyOnly(false)) {
return;
}
this->copyOffloadMode = device->getL0GfxCoreHelper().getDefaultCopyOffloadMode();
if (this->copyOffloadMode != CopyOffloadModes::dualStream || !isImmediateType()) {
// No need to create internal bcs queue
return;
}
if (!static_cast<DeviceImp *>(device)->tryGetCopyEngineOrdinal().has_value()) {
this->copyOffloadMode = CopyOffloadModes::disabled;
return;
}
auto &computeOsContext = getCsr(false)->getOsContext();
ze_command_queue_priority_t immediateQueuePriority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
if (computeOsContext.isHighPriority()) {
immediateQueuePriority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH;
} else if (computeOsContext.isLowPriority()) {
immediateQueuePriority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW;
}
ze_command_queue_mode_t immediateQueueMode = this->isSyncModeQueue ? ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS : ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
NEO::CommandStreamReceiver *copyCsr = nullptr;
uint32_t ordinal = static_cast<DeviceImp *>(device)->getCopyEngineOrdinal();
device->getCsrForOrdinalAndIndex(&copyCsr, ordinal, 0, desc->priority, false);
device->getCsrForOrdinalAndIndex(&copyCsr, ordinal, 0, immediateQueuePriority, false);
UNRECOVERABLE_IF(!copyCsr);
ze_command_queue_desc_t copyQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
copyQueueDesc.ordinal = ordinal;
copyQueueDesc.mode = desc->mode;
copyQueueDesc.priority = desc->priority;
copyQueueDesc.mode = immediateQueueMode;
copyQueueDesc.priority = immediateQueuePriority;
ze_result_t returnValue = ZE_RESULT_SUCCESS;
auto offloadCommandQueue = CommandQueue::create(productFamily, device, copyCsr, &copyQueueDesc, true, false, true, returnValue);
auto offloadCommandQueue = CommandQueue::create(device->getHwInfo().platform.eProductFamily, device, copyCsr, &copyQueueDesc, true, false, true, returnValue);
UNRECOVERABLE_IF(!offloadCommandQueue);
this->cmdQImmediateCopyOffload = offloadCommandQueue;

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -46,7 +46,7 @@ struct CommandListImp : public CommandList {
virtual void patchInOrderCmds() = 0;
void enableSynchronizedDispatch(NEO::SynchronizedDispatchMode mode);
NEO::SynchronizedDispatchMode getSynchronizedDispatchMode() const { return synchronizedDispatchMode; }
void enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc);
void enableCopyOperationOffload();
void setInterruptEventsCsr(NEO::CommandStreamReceiver &csr);
protected:

View File

@ -271,7 +271,7 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
const bool copyOffloadAllowed = cmdList->isInOrderExecutionEnabled() && !getProductHelper().isDcFlushAllowed() && (getL0GfxCoreHelper().getDefaultCopyOffloadMode() != CopyOffloadModes::dualStream);
if (copyOffloadHint && copyOffloadAllowed) {
cmdList->enableCopyOperationOffload(productFamily, this, nullptr);
cmdList->enableCopyOperationOffload();
}
if (returnValue != ZE_RESULT_SUCCESS) {

View File

@ -114,6 +114,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
NEO::DeviceBitfield deviceBitfield) const = 0;
virtual uint64_t getOaTimestampValidBits() const = 0;
virtual CopyOffloadMode getDefaultCopyOffloadMode() const = 0;
virtual bool isDefaultCmdListWithCopyOffloadSupported() const = 0;
protected:
L0GfxCoreHelper() = default;
@ -170,6 +171,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
NEO::DeviceBitfield deviceBitfield) const override;
uint64_t getOaTimestampValidBits() const override;
CopyOffloadMode getDefaultCopyOffloadMode() const override;
bool isDefaultCmdListWithCopyOffloadSupported() const override;
protected:
L0GfxCoreHelperHw() = default;

View File

@ -104,4 +104,9 @@ CopyOffloadMode L0GfxCoreHelperHw<Family>::getDefaultCopyOffloadMode() const {
return CopyOffloadModes::dualStream;
}
template <typename Family>
bool L0GfxCoreHelperHw<Family>::isDefaultCmdListWithCopyOffloadSupported() const {
return false;
}
} // namespace L0

View File

@ -194,7 +194,7 @@ struct InOrderCmdListFixture : public ::Test<ModuleFixture> {
cmdList->enableInOrderExecution();
if (copyOffloadEnabled) {
cmdList->enableCopyOperationOffload(device->getHwInfo().platform.eProductFamily, device, &desc);
cmdList->enableCopyOperationOffload();
cmdList->copyOperationFenceSupported = device->getProductHelper().isDeviceToHostCopySignalingFenceRequired();
}

View File

@ -190,6 +190,22 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonDualStreamModeAndProfilingEventWithRe
}
}
HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingRegularCmdListThenEnableCopyOffload, IsAtLeastXeHpCore) {
NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(1);
auto regularCmdList = createRegularCmdList<FamilyType::gfxCoreFamily>(false);
EXPECT_EQ(CopyOffloadModes::disabled, regularCmdList->copyOffloadMode);
NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(2);
regularCmdList = createRegularCmdList<FamilyType::gfxCoreFamily>(false);
EXPECT_NE(CopyOffloadModes::disabled, regularCmdList->copyOffloadMode);
EXPECT_EQ(nullptr, regularCmdList->cmdQImmediateCopyOffload);
regularCmdList = createRegularCmdList<FamilyType::gfxCoreFamily>(true);
EXPECT_EQ(CopyOffloadModes::disabled, regularCmdList->copyOffloadMode);
}
HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnableCopyOffload, IsAtLeastXeHpCore) {
NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(1);
@ -213,11 +229,14 @@ HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnabl
EXPECT_NE(CopyOffloadModes::disabled, cmdList->copyOffloadMode);
EXPECT_NE(nullptr, cmdList->cmdQImmediateCopyOffload);
auto queue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
EXPECT_EQ(cmdQueueDesc.priority, queue->desc.priority);
EXPECT_EQ(cmdQueueDesc.mode, queue->desc.mode);
EXPECT_TRUE(queue->peekIsCopyOnlyCommandQueue());
EXPECT_TRUE(NEO::EngineHelpers::isBcs(queue->getCsr()->getOsContext().getEngineType()));
auto copyQueue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
auto computeQueue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediate);
EXPECT_EQ(computeQueue->getCsr()->getOsContext().isHighPriority(), copyQueue->getCsr()->getOsContext().isHighPriority());
EXPECT_EQ(cmdQueueDesc.mode, copyQueue->desc.mode);
EXPECT_TRUE(copyQueue->peekIsCopyOnlyCommandQueue());
EXPECT_TRUE(NEO::EngineHelpers::isBcs(copyQueue->getCsr()->getOsContext().getEngineType()));
}
zeCommandListDestroy(cmdListHandle);
@ -237,11 +256,13 @@ HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnabl
EXPECT_NE(CopyOffloadModes::disabled, cmdList->copyOffloadMode);
EXPECT_NE(nullptr, cmdList->cmdQImmediateCopyOffload);
auto queue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
EXPECT_EQ(cmdQueueDesc.priority, queue->desc.priority);
EXPECT_EQ(cmdQueueDesc.mode, queue->desc.mode);
EXPECT_TRUE(queue->peekIsCopyOnlyCommandQueue());
EXPECT_TRUE(NEO::EngineHelpers::isBcs(queue->getCsr()->getOsContext().getEngineType()));
auto copyQueue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
auto computeQueue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediate);
EXPECT_EQ(computeQueue->getCsr()->getOsContext().isHighPriority(), copyQueue->getCsr()->getOsContext().isHighPriority());
EXPECT_EQ(cmdQueueDesc.mode, copyQueue->desc.mode);
EXPECT_TRUE(copyQueue->peekIsCopyOnlyCommandQueue());
EXPECT_TRUE(NEO::EngineHelpers::isBcs(copyQueue->getCsr()->getOsContext().getEngineType()));
}
zeCommandListDestroy(cmdListHandle);

View File

@ -266,7 +266,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionNumber, -1, "Call exit(0) on X s
DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionMode, 0, "Exit on X submission mode. 0: Any context type, 1: Compute context only, 2: Copy context only ")
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderImmediateCmdListExecution, -1, "-1: default, 0: disabled, 1: all Immediate Command Lists are switched to in-order execution")
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderEvents, -1, "-1: default, 0: disabled, 1: Enable all Events as in-order, to rely on command list counter value")
DECLARE_DEBUG_VARIABLE(int32_t, ForceCopyOperationOffloadForComputeCmdList, -1, "-1: default, 0: disabled, 1: Enabled. If set, all immediate compute in-order cmdlist will try to offload copy operations to copy engine")
DECLARE_DEBUG_VARIABLE(int32_t, ForceCopyOperationOffloadForComputeCmdList, -1, "-1: default, 0: disabled, 1: Enabled for immediate in-order cmd lists, 2: Enabled for all types. If enabled, all compute cmdlist will try to offload copy operations to copy engine")
DECLARE_DEBUG_VARIABLE(int32_t, EnableImplicitConvertionToCounterBasedEvents, -1, "-1: default, 0: Disable, 1: Enable. If enabled, try to convert Regular Events used on Immediate CL to CounterBased")
DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush disabled, 1: Tlb Flush enabled")
DECLARE_DEBUG_VARIABLE(int32_t, AllowDcFlush, -1, "-1: default, 0: DC flush disabled, 1: DC flush enabled")