feature: allow enabling copy offload for regular cmd lists

Related-To: NEO-7067

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski 2025-05-07 14:53:40 +00:00 committed by Compute-Runtime-Automation
parent 6ae43123f6
commit 2782577f1d
5 changed files with 72 additions and 5 deletions

View File

@ -273,11 +273,7 @@ CommandList *CommandList::createImmediate(uint32_t productFamily, Device *device
}
void CommandListImp::enableCopyOperationOffload(uint32_t productFamily, Device *device, const ze_command_queue_desc_t *desc) {
this->copyOffloadMode = CopyOffloadModes::dualStream;
if (NEO::debugManager.flags.OverrideCopyOffloadMode.get() != -1) {
this->copyOffloadMode = static_cast<CopyOffloadMode>(NEO::debugManager.flags.OverrideCopyOffloadMode.get());
}
this->copyOffloadMode = device->getL0GfxCoreHelper().getDefaultCopyOffloadMode();
if (this->copyOffloadMode != CopyOffloadModes::dualStream) {
return;

View File

@ -219,6 +219,7 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
uint32_t index = 0;
uint32_t commandQueueGroupOrdinal = desc->commandQueueGroupOrdinal;
NEO::SynchronizedDispatchMode syncDispatchMode = NEO::SynchronizedDispatchMode::disabled;
bool copyOffloadHint = false;
adjustCommandQueueDesc(commandQueueGroupOrdinal, index);
NEO::EngineGroupType engineGroupType = getEngineGroupTypeForOrdinal(commandQueueGroupOrdinal);
@ -241,6 +242,10 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
createCommandList = newCreateFunc;
}
if (static_cast<uint32_t>(pNext->stype) == ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES) {
copyOffloadHint = reinterpret_cast<const zex_intel_queue_copy_operations_offload_hint_exp_desc_t *>(pNext)->copyOffloadEnabled;
}
pNext = reinterpret_cast<const ze_base_desc_t *>(pNext->pNext);
}
@ -263,6 +268,12 @@ ze_result_t DeviceImp::createCommandList(const ze_command_list_desc_t *desc,
}
}
const bool copyOffloadAllowed = cmdList->isInOrderExecutionEnabled() && !getProductHelper().isDcFlushAllowed() && (getL0GfxCoreHelper().getDefaultCopyOffloadMode() != CopyOffloadModes::dualStream);
if (copyOffloadHint && copyOffloadAllowed) {
cmdList->enableCopyOperationOffload(productFamily, this, nullptr);
}
if (returnValue != ZE_RESULT_SUCCESS) {
cmdList->destroy();
cmdList = nullptr;

View File

@ -42,6 +42,7 @@ enum class RTASDeviceFormatInternal {
version2 = 2,
};
struct CopyOffloadMode;
struct Event;
struct Device;
struct EventPool;
@ -112,6 +113,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
virtual std::unique_ptr<NEO::TagAllocatorBase> getInOrderTimestampAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, NEO::MemoryManager *memoryManager, size_t initialTagCount, size_t packetsCountPerElement, size_t tagAlignment,
NEO::DeviceBitfield deviceBitfield) const = 0;
virtual uint64_t getOaTimestampValidBits() const = 0;
virtual CopyOffloadMode getDefaultCopyOffloadMode() const = 0;
protected:
L0GfxCoreHelper() = default;
@ -167,6 +169,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
std::unique_ptr<NEO::TagAllocatorBase> getInOrderTimestampAllocator(const RootDeviceIndicesContainer &rootDeviceIndices, NEO::MemoryManager *memoryManager, size_t initialTagCount, size_t packetsCountPerElement, size_t tagAlignment,
NEO::DeviceBitfield deviceBitfield) const override;
uint64_t getOaTimestampValidBits() const override;
CopyOffloadMode getDefaultCopyOffloadMode() const override;
protected:
L0GfxCoreHelperHw() = default;

View File

@ -13,6 +13,7 @@
#include "shared/source/utilities/stackvec.h"
#include "shared/source/utilities/tag_allocator.h"
#include "level_zero/core/source/cmdlist/cmdlist_launch_params.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
@ -94,4 +95,13 @@ bool L0GfxCoreHelperHw<Family>::threadResumeRequiresUnlock() const {
return false;
}
template <typename Family>
CopyOffloadMode L0GfxCoreHelperHw<Family>::getDefaultCopyOffloadMode() const {
if (NEO::debugManager.flags.OverrideCopyOffloadMode.get() != -1) {
return static_cast<CopyOffloadMode>(NEO::debugManager.flags.OverrideCopyOffloadMode.get());
}
return CopyOffloadModes::dualStream;
}
} // namespace L0

View File

@ -331,6 +331,53 @@ HWTEST2_F(CopyOffloadInOrderTests, givenQueueDescriptorWhenCreatingCmdListThenEn
}
}
HWTEST_F(CopyOffloadInOrderTests, givenNonDualStreamOffloadWhenCreatingCmdListThenAcceptOffloadHint) {
zex_intel_queue_copy_operations_offload_hint_exp_desc_t copyOffloadDesc = {ZEX_INTEL_STRUCTURE_TYPE_QUEUE_COPY_OPERATIONS_OFFLOAD_HINT_EXP_PROPERTIES};
copyOffloadDesc.copyOffloadEnabled = true;
ze_command_list_desc_t cmdListDesc = {ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC};
cmdListDesc.pNext = &copyOffloadDesc;
ze_command_list_handle_t hCmdList;
{
NEO::debugManager.flags.OverrideCopyOffloadMode.set(nonDualStreamMode);
cmdListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
ASSERT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreate(context->toHandle(), device->toHandle(), &cmdListDesc, &hCmdList));
if (device->getProductHelper().isDcFlushAllowed()) {
EXPECT_EQ(CopyOffloadModes::disabled, static_cast<CommandListImp *>(CommandList::fromHandle(hCmdList))->getCopyOffloadModeForOperation(true));
} else {
EXPECT_EQ(nonDualStreamMode, static_cast<CommandListImp *>(CommandList::fromHandle(hCmdList))->getCopyOffloadModeForOperation(true));
}
zeCommandListDestroy(hCmdList);
}
{
cmdListDesc.flags = 0;
ASSERT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreate(context->toHandle(), device->toHandle(), &cmdListDesc, &hCmdList));
EXPECT_EQ(CopyOffloadModes::disabled, static_cast<CommandListImp *>(CommandList::fromHandle(hCmdList))->getCopyOffloadModeForOperation(true));
zeCommandListDestroy(hCmdList);
}
{
NEO::debugManager.flags.OverrideCopyOffloadMode.set(CopyOffloadModes::dualStream);
cmdListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
ASSERT_EQ(ZE_RESULT_SUCCESS, zeCommandListCreate(context->toHandle(), device->toHandle(), &cmdListDesc, &hCmdList));
EXPECT_EQ(CopyOffloadModes::disabled, static_cast<CommandListImp *>(CommandList::fromHandle(hCmdList))->getCopyOffloadModeForOperation(true));
zeCommandListDestroy(hCmdList);
}
}
HWTEST2_F(CopyOffloadInOrderTests, givenCopyOffloadEnabledWhenProgrammingHwCmdsThenUseCopyCommands, IsAtLeastXeHpCore) {
using XY_COPY_BLT = typename std::remove_const<decltype(FamilyType::cmdInitXyCopyBlt)>::type;