feature: allow enabling copy offload on default cmd list
Related-To: NEO-7067 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
parent
c5c87ab6f9
commit
7f7aa36c52
|
@ -336,6 +336,12 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
|||
enableSynchronizedDispatch((NEO::debugManager.flags.ForceSynchronizedDispatchMode.get() == 1) ? NEO::SynchronizedDispatchMode::full : NEO::SynchronizedDispatchMode::disabled);
|
||||
}
|
||||
|
||||
const bool copyOffloadSupported = l0GfxCoreHelper.isDefaultCmdListWithCopyOffloadSupported() || (NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 2);
|
||||
|
||||
if (copyOffloadSupported && !this->internalUsage && !isCopyOffloadEnabled()) {
|
||||
enableCopyOperationOffload();
|
||||
}
|
||||
|
||||
return returnType;
|
||||
}
|
||||
|
||||
|
|
|
@ -258,38 +258,57 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
|
|||
|
||||
commandList->copyThroughLockedPtrEnabled = gfxCoreHelper.copyThroughLockedPtrEnabled(hwInfo, productHelper);
|
||||
|
||||
const bool cmdListSupportsCopyOffload = !commandList->isCopyOnly(false) && commandList->isInOrderExecutionEnabled() && !productHelper.isDcFlushAllowed() && deviceImp->tryGetCopyEngineOrdinal().has_value();
|
||||
const bool cmdListSupportsCopyOffload = commandList->isInOrderExecutionEnabled() && !productHelper.isDcFlushAllowed();
|
||||
|
||||
if ((NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.get() == 1 || queueProperties.copyOffloadHint) && cmdListSupportsCopyOffload) {
|
||||
commandList->enableCopyOperationOffload(productFamily, device, desc);
|
||||
commandList->enableCopyOperationOffload();
|
||||
}
|
||||
|
||||
return commandList;
|
||||
}
|
||||
|
||||
return commandList;
|
||||
}
|
||||
|
||||
void CommandListImp::enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc) {
|
||||
this->copyOffloadMode = device->getL0GfxCoreHelper().getDefaultCopyOffloadMode();
|
||||
|
||||
if (this->copyOffloadMode != CopyOffloadModes::dualStream) {
|
||||
void CommandListImp::enableCopyOperationOffload() {
|
||||
if (isCopyOnly(false)) {
|
||||
return;
|
||||
}
|
||||
|
||||
this->copyOffloadMode = device->getL0GfxCoreHelper().getDefaultCopyOffloadMode();
|
||||
|
||||
if (this->copyOffloadMode != CopyOffloadModes::dualStream || !isImmediateType()) {
|
||||
// No need to create internal bcs queue
|
||||
return;
|
||||
}
|
||||
|
||||
if (!static_cast<DeviceImp *>(device)->tryGetCopyEngineOrdinal().has_value()) {
|
||||
this->copyOffloadMode = CopyOffloadModes::disabled;
|
||||
return;
|
||||
}
|
||||
|
||||
auto &computeOsContext = getCsr(false)->getOsContext();
|
||||
|
||||
ze_command_queue_priority_t immediateQueuePriority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
if (computeOsContext.isHighPriority()) {
|
||||
immediateQueuePriority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_HIGH;
|
||||
} else if (computeOsContext.isLowPriority()) {
|
||||
immediateQueuePriority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW;
|
||||
}
|
||||
|
||||
ze_command_queue_mode_t immediateQueueMode = this->isSyncModeQueue ? ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS : ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
|
||||
|
||||
NEO::CommandStreamReceiver *copyCsr = nullptr;
|
||||
uint32_t ordinal = static_cast<DeviceImp *>(device)->getCopyEngineOrdinal();
|
||||
|
||||
device->getCsrForOrdinalAndIndex(©Csr, ordinal, 0, desc->priority, false);
|
||||
device->getCsrForOrdinalAndIndex(©Csr, ordinal, 0, immediateQueuePriority, false);
|
||||
UNRECOVERABLE_IF(!copyCsr);
|
||||
|
||||
ze_command_queue_desc_t copyQueueDesc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||
copyQueueDesc.ordinal = ordinal;
|
||||
copyQueueDesc.mode = desc->mode;
|
||||
copyQueueDesc.priority = desc->priority;
|
||||
copyQueueDesc.mode = immediateQueueMode;
|
||||
copyQueueDesc.priority = immediateQueuePriority;
|
||||
|
||||
ze_result_t returnValue = ZE_RESULT_SUCCESS;
|
||||
auto offloadCommandQueue = CommandQueue::create(productFamily, device, copyCsr, ©QueueDesc, true, false, true, returnValue);
|
||||
auto offloadCommandQueue = CommandQueue::create(device->getHwInfo().platform.eProductFamily, device, copyCsr, ©QueueDesc, true, false, true, returnValue);
|
||||
UNRECOVERABLE_IF(!offloadCommandQueue);
|
||||
|
||||
this->cmdQImmediateCopyOffload = offloadCommandQueue;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
|
@ -46,7 +46,7 @@ struct CommandListImp : public CommandList {
|
|||
virtual void patchInOrderCmds() = 0;
|
||||
void enableSynchronizedDispatch(NEO::SynchronizedDispatchMode mode);
|
||||
NEO::SynchronizedDispatchMode getSynchronizedDispatchMode() const { return synchronizedDispatchMode; }
|
||||
void enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc);
|
||||
void enableCopyOperationOffload();
|
||||
void setInterruptEventsCsr(NEO::CommandStreamReceiver &csr);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -271,7 +271,7 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
|
|||
const bool copyOffloadAllowed = cmdList->isInOrderExecutionEnabled() && !getProductHelper().isDcFlushAllowed() && (getL0GfxCoreHelper().getDefaultCopyOffloadMode() != CopyOffloadModes::dualStream);
|
||||
|
||||
if (copyOffloadHint && copyOffloadAllowed) {
|
||||
cmdList->enableCopyOperationOffload(productFamily, this, nullptr);
|
||||
cmdList->enableCopyOperationOffload();
|
||||
}
|
||||
|
||||
if (returnValue != ZE_RESULT_SUCCESS) {
|
||||
|
|
|
@ -114,6 +114,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
|
|||
NEO::DeviceBitfield deviceBitfield) const = 0;
|
||||
virtual uint64_t getOaTimestampValidBits() const = 0;
|
||||
virtual CopyOffloadMode getDefaultCopyOffloadMode() const = 0;
|
||||
virtual bool isDefaultCmdListWithCopyOffloadSupported() const = 0;
|
||||
|
||||
protected:
|
||||
L0GfxCoreHelper() = default;
|
||||
|
@ -170,6 +171,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
|
|||
NEO::DeviceBitfield deviceBitfield) const override;
|
||||
uint64_t getOaTimestampValidBits() const override;
|
||||
CopyOffloadMode getDefaultCopyOffloadMode() const override;
|
||||
bool isDefaultCmdListWithCopyOffloadSupported() const override;
|
||||
|
||||
protected:
|
||||
L0GfxCoreHelperHw() = default;
|
||||
|
|
|
@ -104,4 +104,9 @@ CopyOffloadMode L0GfxCoreHelperHw<Family>::getDefaultCopyOffloadMode() const {
|
|||
return CopyOffloadModes::dualStream;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
bool L0GfxCoreHelperHw<Family>::isDefaultCmdListWithCopyOffloadSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
|
|
@ -194,7 +194,7 @@ struct InOrderCmdListFixture : public ::Test<ModuleFixture> {
|
|||
cmdList->enableInOrderExecution();
|
||||
|
||||
if (copyOffloadEnabled) {
|
||||
cmdList->enableCopyOperationOffload(device->getHwInfo().platform.eProductFamily, device, &desc);
|
||||
cmdList->enableCopyOperationOffload();
|
||||
cmdList->copyOperationFenceSupported = device->getProductHelper().isDeviceToHostCopySignalingFenceRequired();
|
||||
}
|
||||
|
||||
|
|
|
@ -190,6 +190,22 @@ HWTEST2_F(CopyOffloadInOrderTests, givenNonDualStreamModeAndProfilingEventWithRe
|
|||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingRegularCmdListThenEnableCopyOffload, IsAtLeastXeHpCore) {
|
||||
NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(1);
|
||||
|
||||
auto regularCmdList = createRegularCmdList<FamilyType::gfxCoreFamily>(false);
|
||||
EXPECT_EQ(CopyOffloadModes::disabled, regularCmdList->copyOffloadMode);
|
||||
|
||||
NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(2);
|
||||
|
||||
regularCmdList = createRegularCmdList<FamilyType::gfxCoreFamily>(false);
|
||||
EXPECT_NE(CopyOffloadModes::disabled, regularCmdList->copyOffloadMode);
|
||||
EXPECT_EQ(nullptr, regularCmdList->cmdQImmediateCopyOffload);
|
||||
|
||||
regularCmdList = createRegularCmdList<FamilyType::gfxCoreFamily>(true);
|
||||
EXPECT_EQ(CopyOffloadModes::disabled, regularCmdList->copyOffloadMode);
|
||||
}
|
||||
|
||||
HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnableCopyOffload, IsAtLeastXeHpCore) {
|
||||
NEO::debugManager.flags.ForceCopyOperationOffloadForComputeCmdList.set(1);
|
||||
|
||||
|
@ -213,11 +229,14 @@ HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnabl
|
|||
EXPECT_NE(CopyOffloadModes::disabled, cmdList->copyOffloadMode);
|
||||
EXPECT_NE(nullptr, cmdList->cmdQImmediateCopyOffload);
|
||||
|
||||
auto queue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
|
||||
EXPECT_EQ(cmdQueueDesc.priority, queue->desc.priority);
|
||||
EXPECT_EQ(cmdQueueDesc.mode, queue->desc.mode);
|
||||
EXPECT_TRUE(queue->peekIsCopyOnlyCommandQueue());
|
||||
EXPECT_TRUE(NEO::EngineHelpers::isBcs(queue->getCsr()->getOsContext().getEngineType()));
|
||||
auto copyQueue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
|
||||
auto computeQueue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediate);
|
||||
|
||||
EXPECT_EQ(computeQueue->getCsr()->getOsContext().isHighPriority(), copyQueue->getCsr()->getOsContext().isHighPriority());
|
||||
|
||||
EXPECT_EQ(cmdQueueDesc.mode, copyQueue->desc.mode);
|
||||
EXPECT_TRUE(copyQueue->peekIsCopyOnlyCommandQueue());
|
||||
EXPECT_TRUE(NEO::EngineHelpers::isBcs(copyQueue->getCsr()->getOsContext().getEngineType()));
|
||||
}
|
||||
|
||||
zeCommandListDestroy(cmdListHandle);
|
||||
|
@ -237,11 +256,13 @@ HWTEST2_F(CopyOffloadInOrderTests, givenDebugFlagSetWhenCreatingCmdListThenEnabl
|
|||
EXPECT_NE(CopyOffloadModes::disabled, cmdList->copyOffloadMode);
|
||||
EXPECT_NE(nullptr, cmdList->cmdQImmediateCopyOffload);
|
||||
|
||||
auto queue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
|
||||
EXPECT_EQ(cmdQueueDesc.priority, queue->desc.priority);
|
||||
EXPECT_EQ(cmdQueueDesc.mode, queue->desc.mode);
|
||||
EXPECT_TRUE(queue->peekIsCopyOnlyCommandQueue());
|
||||
EXPECT_TRUE(NEO::EngineHelpers::isBcs(queue->getCsr()->getOsContext().getEngineType()));
|
||||
auto copyQueue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediateCopyOffload);
|
||||
auto computeQueue = static_cast<WhiteBox<L0::CommandQueue> *>(cmdList->cmdQImmediate);
|
||||
EXPECT_EQ(computeQueue->getCsr()->getOsContext().isHighPriority(), copyQueue->getCsr()->getOsContext().isHighPriority());
|
||||
|
||||
EXPECT_EQ(cmdQueueDesc.mode, copyQueue->desc.mode);
|
||||
EXPECT_TRUE(copyQueue->peekIsCopyOnlyCommandQueue());
|
||||
EXPECT_TRUE(NEO::EngineHelpers::isBcs(copyQueue->getCsr()->getOsContext().getEngineType()));
|
||||
}
|
||||
|
||||
zeCommandListDestroy(cmdListHandle);
|
||||
|
|
|
@ -266,7 +266,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionNumber, -1, "Call exit(0) on X s
|
|||
DECLARE_DEBUG_VARIABLE(int32_t, ExitOnSubmissionMode, 0, "Exit on X submission mode. 0: Any context type, 1: Compute context only, 2: Copy context only ")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderImmediateCmdListExecution, -1, "-1: default, 0: disabled, 1: all Immediate Command Lists are switched to in-order execution")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceInOrderEvents, -1, "-1: default, 0: disabled, 1: Enable all Events as in-order, to rely on command list counter value")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceCopyOperationOffloadForComputeCmdList, -1, "-1: default, 0: disabled, 1: Enabled. If set, all immediate compute in-order cmdlist will try to offload copy operations to copy engine")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceCopyOperationOffloadForComputeCmdList, -1, "-1: default, 0: disabled, 1: Enabled for immediate in-order cmd lists, 2: Enabled for all types. If enabled, all compute cmdlist will try to offload copy operations to copy engine")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableImplicitConvertionToCounterBasedEvents, -1, "-1: default, 0: Disable, 1: Enable. If enabled, try to convert Regular Events used on Immediate CL to CounterBased")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceTlbFlush, -1, "-1: default, 0: Tlb flush disabled, 1: Tlb Flush enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, AllowDcFlush, -1, "-1: default, 0: DC flush disabled, 1: DC flush enabled")
|
||||
|
|
Loading…
Reference in New Issue