feature: copy offload support for images

Related-To: NEO-7067

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2025-05-14 16:05:56 +00:00
committed by Compute-Runtime-Automation
parent 679b9949ba
commit ef0c525847
18 changed files with 397 additions and 192 deletions

View File

@@ -88,28 +88,28 @@ struct CommandList : _ze_command_list_handle_t {
virtual ze_result_t appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcptr,
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0;
virtual ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage,
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0;
virtual ze_result_t appendImageCopyFromMemoryExt(ze_image_handle_t hDstImage, const void *srcptr,
const ze_image_region_t *pDstRegion,
uint32_t srcRowPitch, uint32_t srcSlicePitch,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0;
virtual ze_result_t appendImageCopyToMemoryExt(void *dstptr, ze_image_handle_t hSrcImage,
const ze_image_region_t *pSrcRegion,
uint32_t destRowPitch, uint32_t destSlicePitch,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0;
virtual ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0;
virtual ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) = 0;
virtual ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle, const ze_group_count_t &threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents,
CmdListKernelLaunchParams &launchParams) = 0;

View File

@@ -94,29 +94,29 @@ struct CommandListCoreFamily : public CommandListImp {
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopyToMemory(void *dstptr, ze_image_handle_t hSrcImage,
const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopyFromMemoryExt(ze_image_handle_t hDstImage, const void *srcptr,
const ze_image_region_t *pDstRegion,
uint32_t srcRowPitch, uint32_t srcSlicePitch,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopyToMemoryExt(void *dstptr, ze_image_handle_t hSrcImage,
const ze_image_region_t *pSrcRegion,
uint32_t destRowPitch, uint32_t destSlicePitch,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion,
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopy(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendLaunchKernel(ze_kernel_handle_t kernelHandle,
const ze_group_count_t &threadGroupDimensions,
ze_event_handle_t hEvent, uint32_t numWaitEvents,
@@ -290,7 +290,8 @@ struct CommandListCoreFamily : public CommandListImp {
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, const Vec3<size_t> &copySize,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
Event *signalEvent);
Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams);
virtual ze_result_t appendLaunchKernelWithParams(Kernel *kernel,
const ze_group_count_t &threadGroupDimensions,

View File

@@ -721,14 +721,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryRangesBarrier(uint
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_image_handle_t hDstImage,
const void *srcPtr,
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_image_handle_t hDstImage, const void *srcPtr, const ze_image_region_t *pDstRegion, ze_event_handle_t hEvent,
uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
return appendImageCopyFromMemoryExt(hDstImage, srcPtr, pDstRegion, 0, 0,
hEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
hEvent, numWaitEvents, phWaitEvents, memoryCopyParams);
}
static ze_image_region_t getRegionFromImageDesc(ze_image_desc_t imgDesc) {
@@ -754,14 +750,8 @@ static ze_image_region_t getRegionFromImageDesc(ze_image_desc_t imgDesc) {
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(ze_image_handle_t hDstImage,
const void *srcPtr,
const ze_image_region_t *pDstRegion,
uint32_t srcRowPitch,
uint32_t srcSlicePitch,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(ze_image_handle_t hDstImage, const void *srcPtr, const ze_image_region_t *pDstRegion, uint32_t srcRowPitch, uint32_t srcSlicePitch,
ze_event_handle_t hEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
if (!hDstImage) {
return ZE_RESULT_ERROR_INVALID_NULL_HANDLE;
}
@@ -828,7 +818,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
image = peerImage;
}
if (isCopyOnly(false)) {
memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*allocationStruct.alloc, *image->getAllocation());
if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) {
if ((bytesPerPixel == 3) || (bytesPerPixel == 6) || image->isMimickedImage()) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
@@ -836,7 +828,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
size_t imgSlicePitch = image->getImageInfo().slicePitch;
auto status = appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
{0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, srcRowPitch, srcSlicePitch,
imgRowPitch, imgSlicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, event);
imgRowPitch, imgSlicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize,
event, numWaitEvents, phWaitEvents, memoryCopyParams);
addToMappedEventList(Event::fromHandle(hEvent));
return status;
}
@@ -927,7 +920,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
launchParams.relaxedOrderingDispatch = memoryCopyParams.relaxedOrderingDispatch;
auto status = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), kernelArgs,
event, numWaitEvents, phWaitEvents,
@@ -943,9 +936,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
return appendImageCopyToMemoryExt(dstPtr, hSrcImage, pSrcRegion, 0, 0,
hEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
hEvent, numWaitEvents, phWaitEvents, memoryCopyParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -956,7 +949,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
uint32_t destSlicePitch,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
if (!dstPtr) {
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
}
@@ -1023,7 +1016,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
image = peerImage;
}
if (isCopyOnly(false)) {
memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*image->getAllocation(), *allocationStruct.alloc);
if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) {
if ((bytesPerPixel == 3) || (bytesPerPixel == 6) || image->isMimickedImage()) {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
@@ -1032,7 +1027,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
auto status = appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
{pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, imgRowPitch, imgSlicePitch,
destRowPitch, destSlicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth},
imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event);
imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event, numWaitEvents, phWaitEvents, memoryCopyParams);
addToMappedEventList(event);
return status;
}
@@ -1146,7 +1141,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
if constexpr (checkIfAllocationImportedRequired()) {
launchParams.isDestinationAllocationImported = this->isAllocationImported(allocationStruct.alloc, device->getDriverHandle()->getSvmAllocsManager());
}
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
launchParams.relaxedOrderingDispatch = memoryCopyParams.relaxedOrderingDispatch;
ret = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(builtinKernel->toHandle(), kernelArgs,
event, numWaitEvents, phWaitEvents, launchParams);
addToMappedEventList(event);
@@ -1163,7 +1158,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
auto dstImage = L0::Image::fromHandle(hDstImage);
auto srcImage = L0::Image::fromHandle(hSrcImage);
cl_int4 srcOffset, dstOffset;
@@ -1230,7 +1225,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
srcImage = peerImage;
}
if (isCopyOnly(false)) {
memoryCopyParams.copyOffloadAllowed = isCopyOffloadAllowed(*srcImage->getAllocation(), *dstImage->getAllocation());
if (isCopyOnly(memoryCopyParams.copyOffloadAllowed)) {
auto bytesPerPixel = static_cast<uint32_t>(srcImage->getImageInfo().surfaceFormat->imageElementSizeInBytes);
ze_image_region_t region = getRegionFromImageDesc(srcImage->getImageDesc());
@@ -1249,7 +1246,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
auto status = appendCopyImageBlit(srcImage->getAllocation(), dstImage->getAllocation(),
{srcRegion.originX, srcRegion.originY, srcRegion.originZ}, {dstRegion.originX, dstRegion.originY, dstRegion.originZ}, srcRowPitch, srcSlicePitch,
dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize, event);
dstRowPitch, dstSlicePitch, bytesPerPixel, {srcRegion.width, srcRegion.height, srcRegion.depth}, srcImgSize, dstImgSize,
event, numWaitEvents, phWaitEvents, memoryCopyParams);
addToMappedEventList(event);
return status;
}
@@ -1291,7 +1289,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
CmdListKernelLaunchParams launchParams = {};
launchParams.isBuiltInKernel = true;
launchParams.relaxedOrderingDispatch = relaxedOrderingDispatch;
launchParams.relaxedOrderingDispatch = memoryCopyParams.relaxedOrderingDispatch;
auto status = CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernel(kernel->toHandle(), kernelArgs,
event, numWaitEvents, phWaitEvents,
launchParams);
@@ -1305,10 +1303,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopy(ze_image_handl
ze_image_handle_t hSrcImage,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
return this->appendImageCopyRegion(hDstImage, hSrcImage, nullptr, nullptr, hEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
numWaitEvents, phWaitEvents, memoryCopyParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -1606,7 +1604,16 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
size_t dstRowPitch, size_t dstSlicePitch,
size_t bytesPerPixel, const Vec3<size_t> &copySize,
const Vec3<size_t> &srcSize, const Vec3<size_t> &dstSize,
Event *signalEvent) {
Event *signalEvent, uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
const bool dualStreamCopyOffloadOperation = isDualStreamCopyOffloadOperation(memoryCopyParams.copyOffloadAllowed);
const bool isCopyOnlySignaling = isCopyOnly(dualStreamCopyOffloadOperation) && !useAdditionalBlitProperties;
auto ret = addEventsToCmdList(numWaitEvents, phWaitEvents, nullptr, memoryCopyParams.relaxedOrderingDispatch, false, true, false, dualStreamCopyOffloadOperation);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
if (!handleCounterBasedEventOperations(signalEvent)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
@@ -1629,7 +1636,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
bool useAdditionalTimestamp = blitProperties.copySize.z > 1;
if (useAdditionalBlitProperties) {
setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp);
} else {
}
if (isCopyOnlySignaling) {
appendEventForProfiling(signalEvent, nullptr, true, false, false, true);
}
blitProperties.transform1DArrayTo2DArrayIfNeeded();
@@ -1641,7 +1650,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendCopyImageBlit(NEO::Graph
}
dummyBlitWa.isWaRequired = true;
if (!useAdditionalBlitProperties) {
if (isCopyOnlySignaling) {
appendSignalEventPostWalker(signalEvent, nullptr, nullptr, false, false, true);
if (this->isInOrderExecutionEnabled()) {
appendSignalInOrderDependencyCounter(signalEvent, false, false, false);

View File

@@ -125,14 +125,14 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
const ze_image_region_t *pDstRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopyToMemory(void *dstPtr,
ze_image_handle_t hSrcImage,
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopyFromMemoryExt(ze_image_handle_t hDstImage,
const void *srcPtr,
@@ -141,7 +141,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
uint32_t srcSlicePitch,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopyToMemoryExt(void *dstPtr,
ze_image_handle_t hSrcImage,
@@ -150,13 +150,12 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
uint32_t destSlicePitch,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopy(
ze_image_handle_t dst, ze_image_handle_t src,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_result_t appendImageCopy(ze_image_handle_t dst, ze_image_handle_t src,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage,
ze_image_handle_t hSrcImage,
@@ -164,7 +163,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) override;
ze_result_t appendMemoryRangesBarrier(uint32_t numRanges,
const size_t *pRangeSizes,

View File

@@ -872,10 +872,10 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopy(
ze_image_handle_t dst, ze_image_handle_t src,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
return CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion(dst, src, nullptr, nullptr, hSignalEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
numWaitEvents, phWaitEvents, memoryCopyParams);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -885,8 +885,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
memoryCopyParams.relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
auto estimatedSize = commonImmediateCommandSize;
if (isCopyOnly(false)) {
@@ -895,12 +895,13 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyRegion
auto sizePerBlit = sizeof(typename GfxFamily::XY_BLOCK_COPY_BLT) + NEO::BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize();
estimatedSize += nBlits * sizePerBlit;
}
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, estimatedSize, false);
checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, estimatedSize, false);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(hDstImage, hSrcImage, pDstRegion, pSrcRegion, hSignalEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
numWaitEvents, phWaitEvents, memoryCopyParams);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, NEO::AppendOperations::kernel, false, hSignalEvent, false, nullptr, nullptr);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, NEO::AppendOperations::kernel,
memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -910,15 +911,16 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
const ze_image_region_t *pDstRegion,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
memoryCopyParams.relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false);
checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, commonImmediateCommandSize, false);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(hDstImage, srcPtr, pDstRegion, hSignalEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
numWaitEvents, phWaitEvents, memoryCopyParams);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, NEO::AppendOperations::kernel, false, hSignalEvent, false, nullptr, nullptr);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, NEO::AppendOperations::kernel,
memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -928,15 +930,16 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
const ze_image_region_t *pSrcRegion,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
memoryCopyParams.relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false);
checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, commonImmediateCommandSize, false);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(dstPtr, hSrcImage, pSrcRegion, hSignalEvent,
numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
numWaitEvents, phWaitEvents, memoryCopyParams);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, NEO::AppendOperations::kernel, false, hSignalEvent, false, nullptr, nullptr);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, NEO::AppendOperations::kernel,
memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -948,15 +951,16 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMe
uint32_t srcSlicePitch,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
memoryCopyParams.relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false);
checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, commonImmediateCommandSize, false);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(hDstImage, srcPtr, pDstRegion, srcRowPitch, srcSlicePitch,
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
hSignalEvent, numWaitEvents, phWaitEvents, memoryCopyParams);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, NEO::AppendOperations::kernel, false, hSignalEvent, false, nullptr, nullptr);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, NEO::AppendOperations::kernel,
memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>
@@ -968,15 +972,16 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
uint32_t destSlicePitch,
ze_event_handle_t hSignalEvent,
uint32_t numWaitEvents,
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
ze_event_handle_t *phWaitEvents, CmdListMemoryCopyParams &memoryCopyParams) {
memoryCopyParams.relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents, false);
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize, false);
checkAvailableSpace(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch, commonImmediateCommandSize, false);
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(dstPtr, hSrcImage, pSrcRegion, destRowPitch, destSlicePitch,
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
hSignalEvent, numWaitEvents, phWaitEvents, memoryCopyParams);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, NEO::AppendOperations::kernel, false, hSignalEvent, false, nullptr, nullptr);
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, memoryCopyParams.relaxedOrderingDispatch), memoryCopyParams.relaxedOrderingDispatch, NEO::AppendOperations::kernel,
memoryCopyParams.copyOffloadAllowed, hSignalEvent, false, nullptr, nullptr);
}
template <GFXCORE_FAMILY gfxCoreFamily>