feature: packed surface format

Resolves: NEO-13669

Signed-off-by: Damian Tomczak <damian.tomczak@intel.com>
This commit is contained in:
Damian Tomczak
2025-06-12 08:16:53 +00:00
committed by Compute-Runtime-Automation
parent 05a9f9b18c
commit 5cd1423bab
29 changed files with 379 additions and 80 deletions

View File

@@ -22,6 +22,7 @@
#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/image_helper.h"
#include "shared/source/helpers/in_order_cmd_helpers.h"
#include "shared/source/helpers/kernel_helpers.h"
#include "shared/source/helpers/pipe_control_args.h"
@@ -908,7 +909,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
builtinKernel->setArgBufferWithAlloc(0u, allocationStruct.alignedAllocationPtr,
allocationStruct.alloc,
nullptr);
builtinKernel->setArgRedescribedImage(1u, image->toHandle());
builtinKernel->setArgRedescribedImage(1u, image->toHandle(), false);
builtinKernel->setArgumentValue(2u, sizeof(size_t), &allocationStruct.offset);
uint32_t origin[] = {pDstRegion->originX,
@@ -1113,7 +1114,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
Kernel *builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(builtInType);
builtinKernel->setArgRedescribedImage(0u, image->toHandle());
builtinKernel->setArgRedescribedImage(0u, image->toHandle(), false);
builtinKernel->setArgBufferWithAlloc(1u, allocationStruct.alignedAllocationPtr,
allocationStruct.alloc,
nullptr);
@@ -1315,8 +1316,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
ze_group_count_t kernelArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY,
srcRegion.depth / groupSizeZ};
kernel->setArgRedescribedImage(0, srcImage->toHandle());
kernel->setArgRedescribedImage(1, dstImage->toHandle());
const bool isPackedFormat = NEO::ImageHelper::areImagesCompatibleWithPackedFormat(device->getProductHelper(), srcImage->getImageInfo(), dstImage->getImageInfo(), srcImage->getAllocation(), dstImage->getAllocation(), srcRegion.width);
kernel->setArgRedescribedImage(0, srcImage->toHandle(), isPackedFormat);
kernel->setArgRedescribedImage(1, dstImage->toHandle(), isPackedFormat);
kernel->setArgumentValue(2, sizeof(srcOffset), &srcOffset);
kernel->setArgumentValue(3, sizeof(dstOffset), &dstOffset);

View File

@@ -39,10 +39,9 @@ struct Image : _ze_image_handle_t {
virtual NEO::GraphicsAllocation *getAllocation() = 0;
virtual NEO::GraphicsAllocation *getImplicitArgsAllocation() = 0;
virtual void copySurfaceStateToSSH(void *surfaceStateHeap,
const uint32_t surfaceStateOffset,
uint32_t surfaceStateOffset,
uint32_t bindlessSlot,
bool isMediaBlockArg) = 0;
virtual void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0;
virtual void copyImplicitArgsSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0;
virtual NEO::ImageInfo getImageInfo() = 0;
virtual ze_image_desc_t getImageDesc() = 0;
virtual ze_result_t getMemoryProperties(ze_image_memory_properties_exp_t *pMemoryProperties) = 0;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -21,9 +21,10 @@ struct ImageCoreFamily : public ImageImp {
using ImageImp::bindlessImage;
ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override;
void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override;
void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override;
void copyImplicitArgsSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override;
void copySurfaceStateToSSH(void *surfaceStateHeap,
uint32_t surfaceStateOffset,
uint32_t bindlessSlot,
bool isMediaBlockArg) override;
bool isMediaFormat(const ze_image_format_layout_t layout) {
if (layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_NV12 ||
layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_P010 ||
@@ -53,6 +54,7 @@ struct ImageCoreFamily : public ImageImp {
RENDER_SURFACE_STATE surfaceState;
RENDER_SURFACE_STATE implicitArgsSurfaceState;
RENDER_SURFACE_STATE redescribedSurfaceState;
RENDER_SURFACE_STATE packedSurfaceState;
};
template <uint32_t gfxProductFamily>

View File

@@ -13,6 +13,7 @@
#include "shared/source/helpers/basic_math.h"
#include "shared/source/helpers/bindless_heaps_helper.h"
#include "shared/source/helpers/gfx_core_helper.h"
#include "shared/source/helpers/image_helper.h"
#include "shared/source/helpers/surface_format_info.h"
#include "shared/source/image/image_surface_state.h"
#include "shared/source/memory_manager/allocation_properties.h"
@@ -244,7 +245,7 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
if (this->bindlessImage) {
auto ssInHeap = getBindlessSlot();
copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, false);
copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, NEO::BindlessImageSlot::image, false);
if (this->sampledImage) {
auto productFamily = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
@@ -261,6 +262,7 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
}
}
const auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
if (this->bindlessImage && implicitArgsAllocation) {
implicitArgsSurfaceState = GfxFamily::cmdInitRenderSurfaceState;
@@ -285,7 +287,6 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
imageImplicitArgs.flagHeight = (imgInfo.imgDesc.imageHeight * pixelSize) - 1u;
imageImplicitArgs.flatPitch = imgInfo.imgDesc.imageRowPitch - 1u;
const auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *implicitArgsAllocation), *this->device->getNEODevice(), implicitArgsAllocation, 0u, &imageImplicitArgs, NEO::ImageImplicitArgs::getSize());
{
@@ -362,50 +363,51 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
}
}
if (productHelper.isPackedCopyFormatSupported()) {
packedSurfaceState = redescribedSurfaceState;
NEO::EncodeSurfaceState<GfxFamily>::convertSurfaceStateToPacked(&packedSurfaceState, imgInfo);
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void ImageCoreFamily<gfxCoreFamily>::copySurfaceStateToSSH(void *surfaceStateHeap,
const uint32_t surfaceStateOffset,
uint32_t surfaceStateOffset,
uint32_t bindlessSlot,
bool isMediaBlockArg) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
// Copy the image's surface state into position in the provided surface state heap
auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset);
memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE),
&surfaceState, sizeof(RENDER_SURFACE_STATE));
const RENDER_SURFACE_STATE *src = nullptr;
switch (bindlessSlot) {
case NEO::BindlessImageSlot::image:
src = &surfaceState;
break;
case NEO::BindlessImageSlot::redescribedImage:
src = &redescribedSurfaceState;
break;
case NEO::BindlessImageSlot::implicitArgs:
src = &implicitArgsSurfaceState;
break;
case NEO::BindlessImageSlot::packedImage:
src = &packedSurfaceState;
break;
default:
UNRECOVERABLE_IF(true);
}
auto dst = ptrOffset(surfaceStateHeap, surfaceStateOffset);
memcpy_s(dst, sizeof(RENDER_SURFACE_STATE), src, sizeof(RENDER_SURFACE_STATE));
if (isMediaBlockArg) {
RENDER_SURFACE_STATE *dstRss = static_cast<RENDER_SURFACE_STATE *>(destSurfaceState);
RENDER_SURFACE_STATE *dstRss = static_cast<RENDER_SURFACE_STATE *>(dst);
NEO::ImageSurfaceStateHelper<GfxFamily>::setWidthForMediaBlockSurfaceState(dstRss, imgInfo);
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void ImageCoreFamily<gfxCoreFamily>::copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap,
const uint32_t surfaceStateOffset) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
// Copy the image's surface state into position in the provided surface state heap
auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset);
memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE),
&redescribedSurfaceState, sizeof(RENDER_SURFACE_STATE));
}
template <GFXCORE_FAMILY gfxCoreFamily>
void ImageCoreFamily<gfxCoreFamily>::copyImplicitArgsSurfaceStateToSSH(void *surfaceStateHeap,
const uint32_t surfaceStateOffset) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
// Copy the image's surface state into position in the provided surface state heap
auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset);
memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE),
&implicitArgsSurfaceState, sizeof(RENDER_SURFACE_STATE));
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool ImageCoreFamily<gfxCoreFamily>::isSuitableForCompression(const StructuresLookupTable &structuresLookupTable, const NEO::ImageInfo &imgInfo) {
auto &hwInfo = device->getHwInfo();

View File

@@ -128,7 +128,7 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI, NEO::N
virtual void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0;
virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) = 0;
virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) = 0;
virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal, bool isPacked) = 0;
virtual ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
uint32_t groupSizeZ) = 0;
virtual ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY,

View File

@@ -599,7 +599,9 @@ ze_result_t KernelImp::setArgImmediate(uint32_t argIndex, size_t argSize, const
return ZE_RESULT_SUCCESS;
}
ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) {
ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal, bool isPacked) {
const uint32_t bindlessSlot = isPacked ? NEO::BindlessImageSlot::packedImage : NEO::BindlessImageSlot::redescribedImage;
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescImage>();
if (argVal == nullptr) {
argumentsResidencyContainer[argIndex] = nullptr;
@@ -622,7 +624,7 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
auto ssInHeap = image->getBindlessSlot();
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
// redescribed image's surface state is after image's implicit args and sampler
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * NEO::BindlessImageSlot::redescribedImage;
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * bindlessSlot;
uint32_t patchSize = this->heaplessEnabled ? 8u : 4u;
uint64_t patchValue = this->heaplessEnabled
? bindlessSlotOffset + bindlessHeapsHelper->getGlobalHeapsBase()
@@ -630,15 +632,15 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), patchSize, patchValue);
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize * NEO::BindlessImageSlot::redescribedImage), 0u);
image->copySurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize * bindlessSlot), 0u, bindlessSlot, false);
isBindlessOffsetSet[argIndex] = true;
} else {
usingSurfaceStateHeap[argIndex] = true;
auto ssPtr = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(arg.bindless) * surfaceStateSize);
image->copyRedescribedSurfaceStateToSSH(ssPtr, 0u);
image->copySurfaceStateToSSH(ssPtr, 0u, bindlessSlot, false);
}
} else {
image->copyRedescribedSurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful);
image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful, bindlessSlot, false);
}
argumentsResidencyContainer[argIndex] = image->getAllocation();
@@ -832,17 +834,17 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), patchSize, patchValue);
image->copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, isMediaBlockImage);
image->copyImplicitArgsSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize), 0u);
image->copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, NEO::BindlessImageSlot::image, isMediaBlockImage);
image->copySurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize), 0u, NEO::BindlessImageSlot::implicitArgs, false);
isBindlessOffsetSet[argIndex] = true;
} else {
usingSurfaceStateHeap[argIndex] = true;
auto ssPtr = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(arg.bindless) * surfaceStateSize);
image->copySurfaceStateToSSH(ssPtr, 0u, isMediaBlockImage);
image->copySurfaceStateToSSH(ssPtr, 0u, NEO::BindlessImageSlot::image, isMediaBlockImage);
}
} else {
image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful, isMediaBlockImage);
image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful, NEO::BindlessImageSlot::image, isMediaBlockImage);
}
argumentsResidencyContainer[argIndex] = image->getAllocation();

View File

@@ -99,7 +99,7 @@ struct KernelImp : Kernel {
ze_result_t setArgUnknown(uint32_t argIndex, size_t argSize, const void *argVal);
ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) override;
ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal, bool isPacked) override;
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override;