feature: add extensions to L0 copy API

Related-To: NEO-15440

Signed-off-by: Radoslaw Jablonski <radoslaw.jablonski@intel.com>
This commit is contained in:
Radoslaw Jablonski
2025-09-10 13:17:27 +00:00
committed by Compute-Runtime-Automation
parent 079cf7d04a
commit 024ee558d4
16 changed files with 142 additions and 10 deletions

View File

@@ -20,6 +20,7 @@ target_sources(${L0_STATIC_LIB_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_memory_copy_params.h
${CMAKE_CURRENT_SOURCE_DIR}/command_to_patch.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}cmdlist_launch_params_ext.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}cmdlist_memory_copy_params_ext.h
${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}copy_offload_mode.h
${CMAKE_CURRENT_SOURCE_DIR}${BRANCH_DIR_SUFFIX}cmdlist_additional_args.cpp
)

View File

@@ -11,6 +11,7 @@
#include "shared/source/command_stream/preemption_mode.h"
#include "shared/source/command_stream/stream_properties.h"
#include "shared/source/command_stream/thread_arbitration_policy.h"
#include "shared/source/helpers/blit_properties.h"
#include "shared/source/helpers/cache_policy.h"
#include "shared/source/helpers/common_types.h"
#include "shared/source/helpers/definitions/command_encoder_args.h"
@@ -154,6 +155,10 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0;
virtual ze_result_t appendMemoryCopyWithParameters(void *dstptr, const void *srcptr, size_t size,
const void *pNext,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstptr, NEO::GraphicsAllocation *srcptr, size_t size, bool flushHost) = 0;
virtual ze_result_t appendMemoryCopyRegion(void *dstPtr,
const ze_copy_region_t *dstRegion,
@@ -169,6 +174,9 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendMemoryFill(void *ptr, const void *pattern,
size_t patternSize, size_t size, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0;
virtual ze_result_t appendMemoryFillWithParameters(void *ptr, const void *pattern,
size_t patternSize, size_t size, const void *pNext, ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;
virtual ze_result_t appendMemoryPrefetch(const void *ptr, size_t count) = 0;
virtual ze_result_t appendSignalEvent(ze_event_handle_t hEvent, bool relaxedOrderingDispatch) = 0;
virtual ze_result_t appendWaitOnEvents(uint32_t numEvents, ze_event_handle_t *phEvent, CommandToPatchContainer *outWaitCmds,
@@ -256,6 +264,8 @@ struct CommandList : _ze_command_list_handle_t {
ze_result_t validateLaunchParams(const Kernel &kernel, const CmdListKernelLaunchParams &launchParams) const;
void setAdditionalBlitPropertiesFromMemoryCopyParams(NEO::BlitProperties &blitProperties, const CmdListMemoryCopyParams &memoryCopyParams) const;
void setOrdinal(uint32_t ord) { ordinal = ord; }
void setCommandListPerThreadScratchSize(uint32_t slotId, uint32_t size) {
UNRECOVERABLE_IF(slotId > 1);
@@ -457,6 +467,7 @@ struct CommandList : _ze_command_list_handle_t {
return closedCmdList;
}
ze_result_t obtainLaunchParamsFromExtensions(const ze_base_desc_t *desc, CmdListKernelLaunchParams &launchParams, ze_kernel_handle_t kernelHandle) const;
ze_result_t obtainMemoryCopyParamsFromExtensions(const ze_base_desc_t *desc, CmdListMemoryCopyParams &memoryCopyParams) const;
void setCaptureTarget(Graph *graph) {
this->captureTarget = graph;

View File

@@ -62,4 +62,11 @@ ze_result_t CommandList::obtainLaunchParamsFromExtensions(const ze_base_desc_t *
return ZE_RESULT_SUCCESS;
}
void CommandList::setAdditionalBlitPropertiesFromMemoryCopyParams(NEO::BlitProperties &blitProperties, const CmdListMemoryCopyParams &memoryCopyParams) const {
}
ze_result_t CommandList::obtainMemoryCopyParamsFromExtensions(const ze_base_desc_t *desc, CmdListMemoryCopyParams &memoryCopyParams) const {
return ZE_RESULT_SUCCESS;
}
} // namespace L0

View File

@@ -156,6 +156,10 @@ struct CommandListCoreFamily : public CommandListImp {
ze_result_t appendMemoryCopy(void *dstptr, const void *srcptr, size_t size,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendMemoryCopyWithParameters(void *dstptr, const void *srcptr, size_t size,
const void *pNext,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) override;
ze_result_t appendPageFaultCopy(NEO::GraphicsAllocation *dstAllocation,
NEO::GraphicsAllocation *srcAllocation,
size_t size,
@@ -177,6 +181,11 @@ struct CommandListCoreFamily : public CommandListImp {
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendMemoryFillWithParameters(void *ptr, const void *pattern,
size_t patternSize, size_t size,
const void *pNext,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) override;
ze_result_t appendMIBBStart(uint64_t address, size_t predication, bool secondLevel) override;
ze_result_t appendMIBBEnd() override;
@@ -254,7 +263,8 @@ struct CommandListCoreFamily : public CommandListImp {
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size,
Event *signalEvent);
Event *signalEvent,
CmdListMemoryCopyParams &memoryCopyParams);
MOCKABLE_VIRTUAL ze_result_t appendMemoryCopyBlitRegion(AlignedAllocationData *srcAllocationData,
AlignedAllocationData *dstAllocationData,

View File

@@ -1551,7 +1551,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size,
Event *signalEvent) {
Event *signalEvent,
CmdListMemoryCopyParams &memoryCopyParams) {
if (dstPtrAlloc) {
dstOffset += ptrDiff<uintptr_t>(dstPtr, dstPtrAlloc->getGpuAddress());
}
@@ -1568,6 +1569,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyBlit(uintptr_t
blitProperties.computeStreamPartitionCount = this->partitionCount;
blitProperties.highPriority = isHighPriorityImmediateCmdList();
setAdditionalBlitPropertiesFromMemoryCopyParams(blitProperties, memoryCopyParams);
addResidency(dstPtrAlloc, srcPtrAlloc, clearColorAllocation);
size_t nBlitsPerRow = NEO::BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForCopyPerRow(blitProperties.copySize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed);
@@ -1737,9 +1740,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendPageFaultCopy(NEO::Graph
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
ze_result_t ret = ZE_RESULT_ERROR_UNKNOWN;
if (isCopyOnly(false)) {
CmdListMemoryCopyParams memoryCopyParams{};
return appendMemoryCopyBlit(dstAddress, dstAllocation, 0u,
srcAddress, srcAllocation, 0u,
size, nullptr);
size, nullptr, memoryCopyParams);
} else {
CmdListKernelLaunchParams launchParams = {};
launchParams.isKernelSplitOperation = rightSize > 0;
@@ -1931,7 +1935,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
ret = appendMemoryCopyBlit(dstAllocationStruct.alignedAllocationPtr,
dstAllocationStruct.alloc, dstAllocationStruct.offset,
srcAllocationStruct.alignedAllocationPtr,
srcAllocationStruct.alloc, srcAllocationStruct.offset, size, signalEvent);
srcAllocationStruct.alloc, srcAllocationStruct.offset, size, signalEvent, memoryCopyParams);
} else {
if (NEO::debugManager.flags.FlushTlbBeforeCopy.get() == 1) {
NEO::PipeControlArgs args;
@@ -2029,6 +2033,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopy(void *dstptr,
return ret;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyWithParameters(void *dstptr,
const void *srcptr,
size_t size,
const void *pNext,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
CmdListMemoryCopyParams memoryCopyParams{};
obtainMemoryCopyParamsFromExtensions(static_cast<const ze_base_desc_t *>(pNext), memoryCopyParams);
return appendMemoryCopy(dstptr, srcptr, size, hSignalEvent, numWaitEvents, phWaitEvents, memoryCopyParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyRegion(void *dstPtr,
const ze_copy_region_t *dstRegion,
@@ -2659,6 +2677,21 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
return res;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFillWithParameters(void *ptr,
const void *pattern,
size_t patternSize,
size_t size,
const void *pNext,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents) {
CmdListMemoryCopyParams memoryCopyParams{};
obtainMemoryCopyParamsFromExtensions(static_cast<const ze_base_desc_t *>(pNext), memoryCopyParams);
return appendMemoryFill(ptr, pattern, patternSize, size, hSignalEvent, numWaitEvents, phWaitEvents, memoryCopyParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr, const void *pattern, size_t patternSize, size_t size, Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
@@ -2723,6 +2756,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr, cons
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
setAdditionalBlitPropertiesFromMemoryCopyParams(blitProperties, memoryCopyParams);
if (useAdditionalBlitProperties) {
setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp);
}

View File

@@ -827,9 +827,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendPageFaultCopy(N
uintptr_t srcAddress = static_cast<uintptr_t>(srcAllocation->getGpuAddress());
auto splitCall = [&](CommandListCoreFamilyImmediate<gfxCoreFamily> *subCmdList, uintptr_t dstAddressParam, uintptr_t srcAddressParam, size_t sizeParam, ze_event_handle_t hSignalEventParam) {
CmdListMemoryCopyParams memoryCopyParams{};
subCmdList->appendMemoryCopyBlit(dstAddressParam, dstAllocation, 0u,
srcAddressParam, srcAllocation, 0u,
sizeParam, nullptr);
sizeParam, nullptr, memoryCopyParams);
return subCmdList->CommandListCoreFamily<gfxCoreFamily>::appendSignalEvent(hSignalEventParam, false);
};

View File

@@ -7,12 +7,15 @@
#pragma once
#include "cmdlist_memory_copy_params_ext.h"
namespace L0 {
struct CmdListMemoryCopyParams {
bool relaxedOrderingDispatch = false;
bool forceDisableCopyOnlyInOrderSignaling = false;
bool copyOffloadAllowed = false;
bool taskCountUpdateRequired = false;
CmdListMemoryCopyParamsExt paramsExt{};
};
} // namespace L0

View File

@@ -0,0 +1,15 @@
/*
* Copyright (C) 2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
namespace L0 {
struct CmdListMemoryCopyParamsExt {
};
} // namespace L0

View File

@@ -493,6 +493,15 @@ struct Mock<CommandList> : public CommandList {
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams));
ADDMETHOD_NOBASE(appendMemoryCopyWithParameters, ze_result_t, ZE_RESULT_SUCCESS,
(void *dstptr,
const void *srcptr,
size_t size,
const void *pNext,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ADDMETHOD_NOBASE(appendPageFaultCopy, ze_result_t, ZE_RESULT_SUCCESS,
(NEO::GraphicsAllocation * dstptr,
NEO::GraphicsAllocation *srcptr,
@@ -525,6 +534,16 @@ struct Mock<CommandList> : public CommandList {
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams));
ADDMETHOD_NOBASE(appendMemoryFillWithParameters, ze_result_t, ZE_RESULT_SUCCESS,
(void *ptr,
const void *pattern,
size_t pattern_size,
size_t size,
const void *pNext,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents));
ADDMETHOD_NOBASE(appendSignalEvent, ze_result_t, ZE_RESULT_SUCCESS,
(ze_event_handle_t hEvent, bool relaxedOrderingDispatch));
@@ -670,7 +689,7 @@ class MockCommandListCoreFamily : public CommandListCoreFamily<gfxCoreFamily> {
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size, Event *signalEvent));
uint64_t size, Event *signalEvent, CmdListMemoryCopyParams &memoryCopyParams));
ADDMETHOD_VOIDRETURN(allocateOrReuseKernelPrivateMemory,
false,

View File

@@ -82,7 +82,7 @@ class MockCommandListHw : public WhiteBox<::L0::CommandListCoreFamily<gfxCoreFam
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size, Event *signalEvent) override {
uint64_t size, Event *signalEvent, CmdListMemoryCopyParams &memoryCopyParams) override {
appendMemoryCopyBlitCalledTimes++;
if (failOnFirstCopy && appendMemoryCopyBlitCalledTimes == 1) {
return ZE_RESULT_ERROR_UNKNOWN;

View File

@@ -484,6 +484,17 @@ HWTEST_F(CommandListTest, givenUnrecognizedDescriptorWhenObtainLaunchParamsFromE
EXPECT_EQ(std::string("Could not recognize provided extension, stype: 0x12.\n"), output);
}
HWTEST_F(CommandListTest, WhenObtainMemoryCopyParamsFromExtensionsIsCalledThenSuccessIsReturned) {
CmdListMemoryCopyParams memoryCopyParams{};
ze_base_desc_t desc{};
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
ze_result_t result = commandList->obtainMemoryCopyParamsFromExtensions(&desc, memoryCopyParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
HWTEST_F(CommandListTest, givenComputeCommandListAnd2dRegionWhenMemoryCopyRegionInUsmHostAllocationCalledThenBuiltinFlagAndDestinationAllocSystemIsSet) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);

View File

@@ -45,7 +45,7 @@ class MockCommandListForMemFill : public WhiteBox<::L0::CommandListCoreFamily<gf
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size, Event *signalEvent) override {
uint64_t size, Event *signalEvent, CmdListMemoryCopyParams &memoryCopyParams) override {
appendMemoryCopyBlitCalledTimes++;
return ZE_RESULT_SUCCESS;
}
@@ -234,7 +234,8 @@ HWTEST_F(AppendMemoryCopyTests, givenCopyOnlyCommandListThenDcFlushIsNotAddedAft
NEO::MockGraphicsAllocation mockAllocationDst(0, 1u /*num gmms*/, NEO::AllocationType::internalHostMemory,
reinterpret_cast<void *>(dstPtr), 0x1000, 0, sizeof(uint32_t),
MemoryPool::system4KBPages, MemoryManager::maxOsContextCount);
commandList->appendMemoryCopyBlit(ptrOffset(dstPtr, dstOffset), &mockAllocationDst, 0, ptrOffset(srcPtr, srcOffset), &mockAllocationSrc, 0, copySize, nullptr);
CmdListMemoryCopyParams memoryCopyParams{};
commandList->appendMemoryCopyBlit(ptrOffset(dstPtr, dstOffset), &mockAllocationDst, 0, ptrOffset(srcPtr, srcOffset), &mockAllocationSrc, 0, copySize, nullptr, memoryCopyParams);
auto &commandContainer = commandList->getCmdContainer();
GenCmdList genCmdList;

View File

@@ -123,7 +123,7 @@ class MockCommandListExtensionHw : public WhiteBox<::L0::CommandListCoreFamily<g
uint64_t dstOffset, uintptr_t srcPtr,
NEO::GraphicsAllocation *srcPtrAlloc,
uint64_t srcOffset,
uint64_t size, Event *signalEvent) override {
uint64_t size, Event *signalEvent, CmdListMemoryCopyParams &memoryCopyParams) override {
appendMemoryCopyBlitCalledTimes++;
if (failOnFirstCopy && appendMemoryCopyBlitCalledTimes == 1) {
return ZE_RESULT_ERROR_UNKNOWN;