mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 07:14:10 +08:00
feature: implement zeCommandListAppendImageCopyToMemoryExt/FromMemoryExt
This PR implements these two APIs defined in Level Zero spec. They are required for SYCL bindless tests read_write_*D_subregion.cpp. Related-To: GSD-4137 Signed-off-by: Wenju He <wenju.he@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
7e599740ea
commit
8acb4678fb
@@ -75,6 +75,16 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
const ze_image_region_t *pSrcRegion,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
|
||||
virtual ze_result_t appendImageCopyFromMemoryExt(ze_image_handle_t hDstImage, const void *srcptr,
|
||||
const ze_image_region_t *pDstRegion,
|
||||
uint32_t srcRowPitch, uint32_t srcSlicePitch,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
|
||||
virtual ze_result_t appendImageCopyToMemoryExt(void *dstptr, ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pSrcRegion,
|
||||
uint32_t destRowPitch, uint32_t destSlicePitch,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) = 0;
|
||||
virtual ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
|
||||
@@ -95,6 +95,17 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
const ze_image_region_t *pSrcRegion, ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
||||
ze_result_t appendImageCopyFromMemoryExt(ze_image_handle_t hDstImage, const void *srcptr,
|
||||
const ze_image_region_t *pDstRegion,
|
||||
uint32_t srcRowPitch, uint32_t srcSlicePitch,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
||||
ze_result_t appendImageCopyToMemoryExt(void *dstptr, ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pSrcRegion,
|
||||
uint32_t destRowPitch, uint32_t destSlicePitch,
|
||||
ze_event_handle_t hEvent, uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
||||
ze_result_t appendImageCopyRegion(ze_image_handle_t hDstImage, ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pDstRegion, const ze_image_region_t *pSrcRegion,
|
||||
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents,
|
||||
@@ -302,7 +313,7 @@ struct CommandListCoreFamily : public CommandListImp {
|
||||
void appendMultiTileBarrier(NEO::Device &neoDevice);
|
||||
void appendDispatchOffsetRegister(bool workloadPartitionEvent, bool beforeProfilingCmds);
|
||||
size_t estimateBufferSizeMultiTileBarrier(const NEO::RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
|
||||
uint64_t getInputBufferSize(NEO::ImageType imageType, uint32_t bufferRowPitch, uint32_t bufferSlicePitch, const ze_image_region_t *region);
|
||||
MOCKABLE_VIRTUAL AlignedAllocationData getAlignedAllocationData(Device *device, const void *buffer, uint64_t bufferSize, bool hostCopyAllowed);
|
||||
size_t getAllocationOffsetForAppendBlitFill(void *ptr, NEO::GraphicsAllocation &gpuAllocation);
|
||||
uint32_t getRegionOffsetForAppendMemoryCopyBlitRegion(AlignedAllocationData *allocationData);
|
||||
|
||||
@@ -630,6 +630,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
return appendImageCopyFromMemoryExt(hDstImage, srcPtr, pDstRegion, 0, 0,
|
||||
hEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(ze_image_handle_t hDstImage,
|
||||
const void *srcPtr,
|
||||
const ze_image_region_t *pDstRegion,
|
||||
uint32_t srcRowPitch,
|
||||
uint32_t srcSlicePitch,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
|
||||
auto image = Image::fromHandle(hDstImage);
|
||||
auto bytesPerPixel = static_cast<uint32_t>(image->getImageInfo().surfaceFormat->imageElementSizeInBytes);
|
||||
@@ -662,17 +675,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
||||
pDstRegion = &tmpRegion;
|
||||
}
|
||||
|
||||
uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, bytesPerPixel, pDstRegion);
|
||||
if (srcRowPitch == 0) {
|
||||
srcRowPitch = pDstRegion->width * bytesPerPixel;
|
||||
}
|
||||
if (srcSlicePitch == 0) {
|
||||
srcSlicePitch = image->getImageInfo().imgDesc.imageType == NEO::ImageType::image1DArray ? 1 : pDstRegion->height * srcRowPitch;
|
||||
}
|
||||
|
||||
uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, srcRowPitch, srcSlicePitch, pDstRegion);
|
||||
|
||||
auto allocationStruct = getAlignedAllocationData(this->device, srcPtr, bufferSize, true);
|
||||
if (allocationStruct.alloc == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
auto rowPitch = pDstRegion->width * bytesPerPixel;
|
||||
auto slicePitch =
|
||||
image->getImageInfo().imgDesc.imageType == NEO::ImageType::image1DArray ? 1 : pDstRegion->height * rowPitch;
|
||||
|
||||
DriverHandleImp *driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
|
||||
if (driverHandle->isRemoteImageNeeded(image, device)) {
|
||||
L0::Image *peerImage = nullptr;
|
||||
@@ -686,8 +702,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
||||
|
||||
if (isCopyOnly()) {
|
||||
auto status = appendCopyImageBlit(allocationStruct.alloc, image->getAllocation(),
|
||||
{0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, rowPitch, slicePitch,
|
||||
rowPitch, slicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, event);
|
||||
{0, 0, 0}, {pDstRegion->originX, pDstRegion->originY, pDstRegion->originZ}, srcRowPitch, srcSlicePitch,
|
||||
srcRowPitch, srcSlicePitch, bytesPerPixel, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, {pDstRegion->width, pDstRegion->height, pDstRegion->depth}, imgSize, event);
|
||||
addToMappedEventList(Event::fromHandle(hEvent));
|
||||
return status;
|
||||
}
|
||||
@@ -731,8 +747,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
||||
builtinKernel->setArgumentValue(3u, sizeof(origin), &origin);
|
||||
|
||||
uint32_t pitch[] = {
|
||||
rowPitch,
|
||||
slicePitch};
|
||||
srcRowPitch,
|
||||
srcSlicePitch};
|
||||
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
|
||||
|
||||
uint32_t groupSizeX = pDstRegion->width;
|
||||
@@ -782,6 +798,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
return appendImageCopyToMemoryExt(dstPtr, hSrcImage, pSrcRegion, 0, 0,
|
||||
hEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(void *dstPtr,
|
||||
ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pSrcRegion,
|
||||
uint32_t destRowPitch,
|
||||
uint32_t destSlicePitch,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
|
||||
auto image = Image::fromHandle(hSrcImage);
|
||||
auto bytesPerPixel = static_cast<uint32_t>(image->getImageInfo().surfaceFormat->imageElementSizeInBytes);
|
||||
@@ -814,17 +843,21 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
||||
pSrcRegion = &tmpRegion;
|
||||
}
|
||||
|
||||
uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, bytesPerPixel, pSrcRegion);
|
||||
if (destRowPitch == 0) {
|
||||
destRowPitch = pSrcRegion->width * bytesPerPixel;
|
||||
}
|
||||
if (destSlicePitch == 0) {
|
||||
destSlicePitch =
|
||||
(image->getImageInfo().imgDesc.imageType == NEO::ImageType::image1DArray ? 1 : pSrcRegion->height) * destRowPitch;
|
||||
}
|
||||
|
||||
uint64_t bufferSize = getInputBufferSize(image->getImageInfo().imgDesc.imageType, destRowPitch, destSlicePitch, pSrcRegion);
|
||||
|
||||
auto allocationStruct = getAlignedAllocationData(this->device, dstPtr, bufferSize, false);
|
||||
if (allocationStruct.alloc == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
}
|
||||
|
||||
auto rowPitch = pSrcRegion->width * bytesPerPixel;
|
||||
auto slicePitch =
|
||||
(image->getImageInfo().imgDesc.imageType == NEO::ImageType::image1DArray ? 1 : pSrcRegion->height) * rowPitch;
|
||||
|
||||
DriverHandleImp *driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
|
||||
if (driverHandle->isRemoteImageNeeded(image, device)) {
|
||||
L0::Image *peerImage = nullptr;
|
||||
@@ -838,8 +871,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
||||
|
||||
if (isCopyOnly()) {
|
||||
auto status = appendCopyImageBlit(image->getAllocation(), allocationStruct.alloc,
|
||||
{pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, rowPitch, slicePitch,
|
||||
rowPitch, slicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event);
|
||||
{pSrcRegion->originX, pSrcRegion->originY, pSrcRegion->originZ}, {0, 0, 0}, destRowPitch, destSlicePitch,
|
||||
destRowPitch, destSlicePitch, bytesPerPixel, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth},
|
||||
imgSize, {pSrcRegion->width, pSrcRegion->height, pSrcRegion->depth}, event);
|
||||
addToMappedEventList(event);
|
||||
return status;
|
||||
}
|
||||
@@ -886,8 +920,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
||||
builtinKernel->setArgumentValue(3u, sizeof(size_t), &allocationStruct.offset);
|
||||
|
||||
uint32_t pitch[] = {
|
||||
rowPitch,
|
||||
slicePitch};
|
||||
destRowPitch,
|
||||
destSlicePitch};
|
||||
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
|
||||
|
||||
uint32_t groupSizeX = pSrcRegion->width;
|
||||
@@ -2202,7 +2236,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendEventForProfilingCopyCommand(Ev
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
inline uint64_t CommandListCoreFamily<gfxCoreFamily>::getInputBufferSize(NEO::ImageType imageType,
|
||||
uint64_t bytesPerPixel,
|
||||
uint32_t bufferRowPitch,
|
||||
uint32_t bufferSlicePitch,
|
||||
const ze_image_region_t *region) {
|
||||
const auto driverHandle = static_cast<DriverHandleImp *>(device->getDriverHandle());
|
||||
switch (imageType) {
|
||||
@@ -2213,12 +2248,12 @@ inline uint64_t CommandListCoreFamily<gfxCoreFamily>::getInputBufferSize(NEO::Im
|
||||
break;
|
||||
case NEO::ImageType::image1D:
|
||||
case NEO::ImageType::image1DArray:
|
||||
return bytesPerPixel * region->width;
|
||||
return bufferRowPitch;
|
||||
case NEO::ImageType::image2D:
|
||||
case NEO::ImageType::image2DArray:
|
||||
return bytesPerPixel * region->width * region->height;
|
||||
return bufferRowPitch * region->height;
|
||||
case NEO::ImageType::image3D:
|
||||
return bytesPerPixel * region->width * region->height * region->depth;
|
||||
return bufferSlicePitch * region->depth;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -125,6 +125,24 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
||||
|
||||
ze_result_t appendImageCopyFromMemoryExt(ze_image_handle_t hDstImage,
|
||||
const void *srcPtr,
|
||||
const ze_image_region_t *pDstRegion,
|
||||
uint32_t srcRowPitch,
|
||||
uint32_t srcSlicePitch,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
||||
|
||||
ze_result_t appendImageCopyToMemoryExt(void *dstPtr,
|
||||
ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pSrcRegion,
|
||||
uint32_t destRowPitch,
|
||||
uint32_t destSlicePitch,
|
||||
ze_event_handle_t hEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;
|
||||
|
||||
ze_result_t appendImageCopy(
|
||||
ze_image_handle_t dst, ze_image_handle_t src,
|
||||
ze_event_handle_t hEvent,
|
||||
|
||||
@@ -802,6 +802,46 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemo
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyFromMemoryExt(
|
||||
ze_image_handle_t hDstImage,
|
||||
const void *srcPtr,
|
||||
const ze_image_region_t *pDstRegion,
|
||||
uint32_t srcRowPitch,
|
||||
uint32_t srcSlicePitch,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize);
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(hDstImage, srcPtr, pDstRegion, srcRowPitch, srcSlicePitch,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendImageCopyToMemoryExt(
|
||||
void *dstPtr,
|
||||
ze_image_handle_t hSrcImage,
|
||||
const ze_image_region_t *pSrcRegion,
|
||||
uint32_t destRowPitch,
|
||||
uint32_t destSlicePitch,
|
||||
ze_event_handle_t hSignalEvent,
|
||||
uint32_t numWaitEvents,
|
||||
ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) {
|
||||
relaxedOrderingDispatch = isRelaxedOrderingDispatchAllowed(numWaitEvents);
|
||||
|
||||
checkAvailableSpace(numWaitEvents, relaxedOrderingDispatch, commonImmediateCommandSize);
|
||||
|
||||
auto ret = CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(dstPtr, hSrcImage, pSrcRegion, destRowPitch, destSlicePitch,
|
||||
hSignalEvent, numWaitEvents, phWaitEvents, relaxedOrderingDispatch);
|
||||
|
||||
return flushImmediate(ret, true, hasStallingCmdsForRelaxedOrdering(numWaitEvents, relaxedOrderingDispatch), relaxedOrderingDispatch, true, hSignalEvent);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendMemoryRangesBarrier(uint32_t numRanges,
|
||||
const size_t *pRangeSizes,
|
||||
|
||||
Reference in New Issue
Block a user