mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
feature: packed surface format
Resolves: NEO-13669 Signed-off-by: Damian Tomczak <damian.tomczak@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
05a9f9b18c
commit
5cd1423bab
@@ -22,6 +22,7 @@
|
||||
#include "shared/source/helpers/definitions/command_encoder_args.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/image_helper.h"
|
||||
#include "shared/source/helpers/in_order_cmd_helpers.h"
|
||||
#include "shared/source/helpers/kernel_helpers.h"
|
||||
#include "shared/source/helpers/pipe_control_args.h"
|
||||
@@ -908,7 +909,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
|
||||
builtinKernel->setArgBufferWithAlloc(0u, allocationStruct.alignedAllocationPtr,
|
||||
allocationStruct.alloc,
|
||||
nullptr);
|
||||
builtinKernel->setArgRedescribedImage(1u, image->toHandle());
|
||||
builtinKernel->setArgRedescribedImage(1u, image->toHandle(), false);
|
||||
builtinKernel->setArgumentValue(2u, sizeof(size_t), &allocationStruct.offset);
|
||||
|
||||
uint32_t origin[] = {pDstRegion->originX,
|
||||
@@ -1113,7 +1114,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
|
||||
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
|
||||
Kernel *builtinKernel = device->getBuiltinFunctionsLib()->getImageFunction(builtInType);
|
||||
|
||||
builtinKernel->setArgRedescribedImage(0u, image->toHandle());
|
||||
builtinKernel->setArgRedescribedImage(0u, image->toHandle(), false);
|
||||
builtinKernel->setArgBufferWithAlloc(1u, allocationStruct.alignedAllocationPtr,
|
||||
allocationStruct.alloc,
|
||||
nullptr);
|
||||
@@ -1315,8 +1316,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
|
||||
ze_group_count_t kernelArgs{srcRegion.width / groupSizeX, srcRegion.height / groupSizeY,
|
||||
srcRegion.depth / groupSizeZ};
|
||||
|
||||
kernel->setArgRedescribedImage(0, srcImage->toHandle());
|
||||
kernel->setArgRedescribedImage(1, dstImage->toHandle());
|
||||
const bool isPackedFormat = NEO::ImageHelper::areImagesCompatibleWithPackedFormat(device->getProductHelper(), srcImage->getImageInfo(), dstImage->getImageInfo(), srcImage->getAllocation(), dstImage->getAllocation(), srcRegion.width);
|
||||
|
||||
kernel->setArgRedescribedImage(0, srcImage->toHandle(), isPackedFormat);
|
||||
kernel->setArgRedescribedImage(1, dstImage->toHandle(), isPackedFormat);
|
||||
|
||||
kernel->setArgumentValue(2, sizeof(srcOffset), &srcOffset);
|
||||
kernel->setArgumentValue(3, sizeof(dstOffset), &dstOffset);
|
||||
|
||||
|
||||
@@ -39,10 +39,9 @@ struct Image : _ze_image_handle_t {
|
||||
virtual NEO::GraphicsAllocation *getAllocation() = 0;
|
||||
virtual NEO::GraphicsAllocation *getImplicitArgsAllocation() = 0;
|
||||
virtual void copySurfaceStateToSSH(void *surfaceStateHeap,
|
||||
const uint32_t surfaceStateOffset,
|
||||
uint32_t surfaceStateOffset,
|
||||
uint32_t bindlessSlot,
|
||||
bool isMediaBlockArg) = 0;
|
||||
virtual void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0;
|
||||
virtual void copyImplicitArgsSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) = 0;
|
||||
virtual NEO::ImageInfo getImageInfo() = 0;
|
||||
virtual ze_image_desc_t getImageDesc() = 0;
|
||||
virtual ze_result_t getMemoryProperties(ze_image_memory_properties_exp_t *pMemoryProperties) = 0;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -21,9 +21,10 @@ struct ImageCoreFamily : public ImageImp {
|
||||
using ImageImp::bindlessImage;
|
||||
|
||||
ze_result_t initialize(Device *device, const ze_image_desc_t *desc) override;
|
||||
void copySurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset, bool isMediaBlockArg) override;
|
||||
void copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override;
|
||||
void copyImplicitArgsSurfaceStateToSSH(void *surfaceStateHeap, const uint32_t surfaceStateOffset) override;
|
||||
void copySurfaceStateToSSH(void *surfaceStateHeap,
|
||||
uint32_t surfaceStateOffset,
|
||||
uint32_t bindlessSlot,
|
||||
bool isMediaBlockArg) override;
|
||||
bool isMediaFormat(const ze_image_format_layout_t layout) {
|
||||
if (layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_NV12 ||
|
||||
layout == ze_image_format_layout_t::ZE_IMAGE_FORMAT_LAYOUT_P010 ||
|
||||
@@ -53,6 +54,7 @@ struct ImageCoreFamily : public ImageImp {
|
||||
RENDER_SURFACE_STATE surfaceState;
|
||||
RENDER_SURFACE_STATE implicitArgsSurfaceState;
|
||||
RENDER_SURFACE_STATE redescribedSurfaceState;
|
||||
RENDER_SURFACE_STATE packedSurfaceState;
|
||||
};
|
||||
|
||||
template <uint32_t gfxProductFamily>
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "shared/source/helpers/basic_math.h"
|
||||
#include "shared/source/helpers/bindless_heaps_helper.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/image_helper.h"
|
||||
#include "shared/source/helpers/surface_format_info.h"
|
||||
#include "shared/source/image/image_surface_state.h"
|
||||
#include "shared/source/memory_manager/allocation_properties.h"
|
||||
@@ -244,7 +245,7 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
|
||||
|
||||
if (this->bindlessImage) {
|
||||
auto ssInHeap = getBindlessSlot();
|
||||
copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, false);
|
||||
copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, NEO::BindlessImageSlot::image, false);
|
||||
|
||||
if (this->sampledImage) {
|
||||
auto productFamily = this->device->getNEODevice()->getHardwareInfo().platform.eProductFamily;
|
||||
@@ -261,6 +262,7 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
|
||||
}
|
||||
}
|
||||
|
||||
const auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
|
||||
if (this->bindlessImage && implicitArgsAllocation) {
|
||||
implicitArgsSurfaceState = GfxFamily::cmdInitRenderSurfaceState;
|
||||
|
||||
@@ -285,7 +287,6 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
|
||||
imageImplicitArgs.flagHeight = (imgInfo.imgDesc.imageHeight * pixelSize) - 1u;
|
||||
imageImplicitArgs.flatPitch = imgInfo.imgDesc.imageRowPitch - 1u;
|
||||
|
||||
const auto &productHelper = rootDeviceEnvironment.getHelper<NEO::ProductHelper>();
|
||||
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *implicitArgsAllocation), *this->device->getNEODevice(), implicitArgsAllocation, 0u, &imageImplicitArgs, NEO::ImageImplicitArgs::getSize());
|
||||
|
||||
{
|
||||
@@ -362,50 +363,51 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
|
||||
}
|
||||
}
|
||||
|
||||
if (productHelper.isPackedCopyFormatSupported()) {
|
||||
packedSurfaceState = redescribedSurfaceState;
|
||||
|
||||
NEO::EncodeSurfaceState<GfxFamily>::convertSurfaceStateToPacked(&packedSurfaceState, imgInfo);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void ImageCoreFamily<gfxCoreFamily>::copySurfaceStateToSSH(void *surfaceStateHeap,
|
||||
const uint32_t surfaceStateOffset,
|
||||
uint32_t surfaceStateOffset,
|
||||
uint32_t bindlessSlot,
|
||||
bool isMediaBlockArg) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
|
||||
// Copy the image's surface state into position in the provided surface state heap
|
||||
auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset);
|
||||
memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE),
|
||||
&surfaceState, sizeof(RENDER_SURFACE_STATE));
|
||||
const RENDER_SURFACE_STATE *src = nullptr;
|
||||
|
||||
switch (bindlessSlot) {
|
||||
case NEO::BindlessImageSlot::image:
|
||||
src = &surfaceState;
|
||||
break;
|
||||
case NEO::BindlessImageSlot::redescribedImage:
|
||||
src = &redescribedSurfaceState;
|
||||
break;
|
||||
case NEO::BindlessImageSlot::implicitArgs:
|
||||
src = &implicitArgsSurfaceState;
|
||||
break;
|
||||
case NEO::BindlessImageSlot::packedImage:
|
||||
src = &packedSurfaceState;
|
||||
break;
|
||||
default:
|
||||
UNRECOVERABLE_IF(true);
|
||||
}
|
||||
|
||||
auto dst = ptrOffset(surfaceStateHeap, surfaceStateOffset);
|
||||
memcpy_s(dst, sizeof(RENDER_SURFACE_STATE), src, sizeof(RENDER_SURFACE_STATE));
|
||||
|
||||
if (isMediaBlockArg) {
|
||||
RENDER_SURFACE_STATE *dstRss = static_cast<RENDER_SURFACE_STATE *>(destSurfaceState);
|
||||
RENDER_SURFACE_STATE *dstRss = static_cast<RENDER_SURFACE_STATE *>(dst);
|
||||
NEO::ImageSurfaceStateHelper<GfxFamily>::setWidthForMediaBlockSurfaceState(dstRss, imgInfo);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void ImageCoreFamily<gfxCoreFamily>::copyRedescribedSurfaceStateToSSH(void *surfaceStateHeap,
|
||||
const uint32_t surfaceStateOffset) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
|
||||
// Copy the image's surface state into position in the provided surface state heap
|
||||
auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset);
|
||||
memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE),
|
||||
&redescribedSurfaceState, sizeof(RENDER_SURFACE_STATE));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void ImageCoreFamily<gfxCoreFamily>::copyImplicitArgsSurfaceStateToSSH(void *surfaceStateHeap,
|
||||
const uint32_t surfaceStateOffset) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
using RENDER_SURFACE_STATE = typename GfxFamily::RENDER_SURFACE_STATE;
|
||||
|
||||
// Copy the image's surface state into position in the provided surface state heap
|
||||
auto destSurfaceState = ptrOffset(surfaceStateHeap, surfaceStateOffset);
|
||||
memcpy_s(destSurfaceState, sizeof(RENDER_SURFACE_STATE),
|
||||
&implicitArgsSurfaceState, sizeof(RENDER_SURFACE_STATE));
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
bool ImageCoreFamily<gfxCoreFamily>::isSuitableForCompression(const StructuresLookupTable &structuresLookupTable, const NEO::ImageInfo &imgInfo) {
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
|
||||
@@ -128,7 +128,7 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI, NEO::N
|
||||
virtual void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0;
|
||||
|
||||
virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) = 0;
|
||||
virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) = 0;
|
||||
virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal, bool isPacked) = 0;
|
||||
virtual ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
uint32_t groupSizeZ) = 0;
|
||||
virtual ze_result_t suggestGroupSize(uint32_t globalSizeX, uint32_t globalSizeY,
|
||||
|
||||
@@ -599,7 +599,9 @@ ze_result_t KernelImp::setArgImmediate(uint32_t argIndex, size_t argSize, const
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) {
|
||||
ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal, bool isPacked) {
|
||||
const uint32_t bindlessSlot = isPacked ? NEO::BindlessImageSlot::packedImage : NEO::BindlessImageSlot::redescribedImage;
|
||||
|
||||
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescImage>();
|
||||
if (argVal == nullptr) {
|
||||
argumentsResidencyContainer[argIndex] = nullptr;
|
||||
@@ -622,7 +624,7 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
|
||||
auto ssInHeap = image->getBindlessSlot();
|
||||
auto patchLocation = ptrOffset(getCrossThreadData(), arg.bindless);
|
||||
// redescribed image's surface state is after image's implicit args and sampler
|
||||
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * NEO::BindlessImageSlot::redescribedImage;
|
||||
uint64_t bindlessSlotOffset = ssInHeap->surfaceStateOffset + surfaceStateSize * bindlessSlot;
|
||||
uint32_t patchSize = this->heaplessEnabled ? 8u : 4u;
|
||||
uint64_t patchValue = this->heaplessEnabled
|
||||
? bindlessSlotOffset + bindlessHeapsHelper->getGlobalHeapsBase()
|
||||
@@ -630,15 +632,15 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
|
||||
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), patchSize, patchValue);
|
||||
|
||||
image->copyRedescribedSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize * NEO::BindlessImageSlot::redescribedImage), 0u);
|
||||
image->copySurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize * bindlessSlot), 0u, bindlessSlot, false);
|
||||
isBindlessOffsetSet[argIndex] = true;
|
||||
} else {
|
||||
usingSurfaceStateHeap[argIndex] = true;
|
||||
auto ssPtr = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(arg.bindless) * surfaceStateSize);
|
||||
image->copyRedescribedSurfaceStateToSSH(ssPtr, 0u);
|
||||
image->copySurfaceStateToSSH(ssPtr, 0u, bindlessSlot, false);
|
||||
}
|
||||
} else {
|
||||
image->copyRedescribedSurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful);
|
||||
image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful, bindlessSlot, false);
|
||||
}
|
||||
argumentsResidencyContainer[argIndex] = image->getAllocation();
|
||||
|
||||
@@ -832,17 +834,17 @@ ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void
|
||||
|
||||
patchWithRequiredSize(const_cast<uint8_t *>(patchLocation), patchSize, patchValue);
|
||||
|
||||
image->copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, isMediaBlockImage);
|
||||
image->copyImplicitArgsSurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize), 0u);
|
||||
image->copySurfaceStateToSSH(ssInHeap->ssPtr, 0u, NEO::BindlessImageSlot::image, isMediaBlockImage);
|
||||
image->copySurfaceStateToSSH(ptrOffset(ssInHeap->ssPtr, surfaceStateSize), 0u, NEO::BindlessImageSlot::implicitArgs, false);
|
||||
|
||||
isBindlessOffsetSet[argIndex] = true;
|
||||
} else {
|
||||
usingSurfaceStateHeap[argIndex] = true;
|
||||
auto ssPtr = ptrOffset(surfaceStateHeapData.get(), getSurfaceStateIndexForBindlessOffset(arg.bindless) * surfaceStateSize);
|
||||
image->copySurfaceStateToSSH(ssPtr, 0u, isMediaBlockImage);
|
||||
image->copySurfaceStateToSSH(ssPtr, 0u, NEO::BindlessImageSlot::image, isMediaBlockImage);
|
||||
}
|
||||
} else {
|
||||
image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful, isMediaBlockImage);
|
||||
image->copySurfaceStateToSSH(surfaceStateHeapData.get(), arg.bindful, NEO::BindlessImageSlot::image, isMediaBlockImage);
|
||||
}
|
||||
|
||||
argumentsResidencyContainer[argIndex] = image->getAllocation();
|
||||
|
||||
@@ -99,7 +99,7 @@ struct KernelImp : Kernel {
|
||||
|
||||
ze_result_t setArgUnknown(uint32_t argIndex, size_t argSize, const void *argVal);
|
||||
|
||||
ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) override;
|
||||
ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal, bool isPacked) override;
|
||||
|
||||
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user