feature: use stateless builtins for image APIs

Related-To: NEO-15461, NEO-6075

Signed-off-by: Narendra Bagria <narendra.bagria@intel.com>
This commit is contained in:
Narendra Bagria
2025-08-25 10:53:41 +00:00
committed by Compute-Runtime-Automation
parent af8b1030f1
commit 543423fba9
6 changed files with 317 additions and 26 deletions

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2024 Intel Corporation
* Copyright (C) 2020-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -52,36 +52,52 @@ enum class Builtin : uint32_t {
enum class ImageBuiltin : uint32_t {
copyBufferToImage3d16Bytes = 0u,
copyBufferToImage3d16BytesStateless,
copyBufferToImage3d16BytesHeapless,
copyBufferToImage3d2Bytes,
copyBufferToImage3d2BytesStateless,
copyBufferToImage3d2BytesHeapless,
copyBufferToImage3d4Bytes,
copyBufferToImage3d4BytesStateless,
copyBufferToImage3d4BytesHeapless,
copyBufferToImage3d3To4Bytes,
copyBufferToImage3d3To4BytesStateless,
copyBufferToImage3d3To4BytesHeapless,
copyBufferToImage3d8Bytes,
copyBufferToImage3d8BytesStateless,
copyBufferToImage3d8BytesHeapless,
copyBufferToImage3d6To8Bytes,
copyBufferToImage3d6To8BytesStateless,
copyBufferToImage3d6To8BytesHeapless,
copyBufferToImage3dBytes,
copyBufferToImage3dBytesStateless,
copyBufferToImage3dBytesHeapless,
copyImage3dToBuffer16Bytes,
copyImage3dToBuffer16BytesStateless,
copyImage3dToBuffer16BytesHeapless,
copyImage3dToBuffer2Bytes,
copyImage3dToBuffer2BytesStateless,
copyImage3dToBuffer2BytesHeapless,
copyImage3dToBuffer3Bytes,
copyImage3dToBuffer3BytesStateless,
copyImage3dToBuffer3BytesHeapless,
copyImage3dToBuffer4Bytes,
copyImage3dToBuffer4BytesStateless,
copyImage3dToBuffer4BytesHeapless,
copyImage3dToBuffer4To3Bytes,
copyImage3dToBuffer4To3BytesStateless,
copyImage3dToBuffer4To3BytesHeapless,
copyImage3dToBuffer6Bytes,
copyImage3dToBuffer6BytesStateless,
copyImage3dToBuffer6BytesHeapless,
copyImage3dToBuffer8Bytes,
copyImage3dToBuffer8BytesStateless,
copyImage3dToBuffer8BytesHeapless,
copyImage3dToBuffer8To6Bytes,
copyImage3dToBuffer8To6BytesStateless,
copyImage3dToBuffer8To6BytesHeapless,
copyImage3dToBufferBytes,
copyImage3dToBufferBytesStateless,
copyImage3dToBufferBytesHeapless,
copyImageRegion,
copyImageRegionHeapless,
@@ -195,14 +211,20 @@ constexpr Builtin adjustBuiltinType<Builtin::fillBufferRightLeftover>(const bool
}
template <ImageBuiltin type>
constexpr ImageBuiltin adjustImageBuiltinType(const bool isHeapless) {
constexpr ImageBuiltin adjustImageBuiltinType(const bool isStateless, const bool isHeapless) {
return type;
}
#define DEFINE_ADJUST_IMAGE_BUILTIN_TYPE(type) \
template <> \
constexpr ImageBuiltin adjustImageBuiltinType<type>(const bool isHeapless) { \
return isHeapless ? type##Heapless : type; \
#define DEFINE_ADJUST_IMAGE_BUILTIN_TYPE(type) \
template <> \
constexpr ImageBuiltin adjustImageBuiltinType<type>(const bool isStateless, const bool isHeapless) { \
if (isHeapless) { \
return type##Heapless; \
} else if (isStateless) { \
return type##Stateless; \
} else { \
return type; \
} \
}
DEFINE_ADJUST_IMAGE_BUILTIN_TYPE(ImageBuiltin::copyBufferToImage3d16Bytes);
@@ -221,7 +243,14 @@ DEFINE_ADJUST_IMAGE_BUILTIN_TYPE(ImageBuiltin::copyImage3dToBuffer6Bytes);
DEFINE_ADJUST_IMAGE_BUILTIN_TYPE(ImageBuiltin::copyImage3dToBuffer8Bytes);
DEFINE_ADJUST_IMAGE_BUILTIN_TYPE(ImageBuiltin::copyImage3dToBuffer8To6Bytes);
DEFINE_ADJUST_IMAGE_BUILTIN_TYPE(ImageBuiltin::copyImage3dToBufferBytes);
DEFINE_ADJUST_IMAGE_BUILTIN_TYPE(ImageBuiltin::copyImageRegion);
template <>
constexpr ImageBuiltin adjustImageBuiltinType<ImageBuiltin::copyImageRegion>(const bool isStateless, const bool isHeapless) {
if (isHeapless) {
return ImageBuiltin::copyImageRegionHeapless;
}
return ImageBuiltin::copyImageRegion;
}
} // namespace BuiltinTypeHelper

View File

@@ -162,6 +162,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyBufferToImage3d16Bytes";
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
break;
case ImageBuiltin::copyBufferToImage3d16BytesStateless:
builtinName = "CopyBufferToImage3d16BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
break;
case ImageBuiltin::copyBufferToImage3d16BytesHeapless:
builtinName = "CopyBufferToImage3d16BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
@@ -170,6 +174,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyBufferToImage3d2Bytes";
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
break;
case ImageBuiltin::copyBufferToImage3d2BytesStateless:
builtinName = "CopyBufferToImage3d2BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
break;
case ImageBuiltin::copyBufferToImage3d2BytesHeapless:
builtinName = "CopyBufferToImage3d2BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
@@ -178,6 +186,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyBufferToImage3d4Bytes";
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
break;
case ImageBuiltin::copyBufferToImage3d4BytesStateless:
builtinName = "CopyBufferToImage3d4BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
break;
case ImageBuiltin::copyBufferToImage3d4BytesHeapless:
builtinName = "CopyBufferToImage3d4BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
@@ -186,6 +198,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyBufferToImage3d3To4Bytes";
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
break;
case ImageBuiltin::copyBufferToImage3d3To4BytesStateless:
builtinName = "CopyBufferToImage3d3To4BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
break;
case ImageBuiltin::copyBufferToImage3d3To4BytesHeapless:
builtinName = "CopyBufferToImage3d3To4BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
@@ -194,6 +210,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyBufferToImage3d8Bytes";
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
break;
case ImageBuiltin::copyBufferToImage3d8BytesStateless:
builtinName = "CopyBufferToImage3d8BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
break;
case ImageBuiltin::copyBufferToImage3d8BytesHeapless:
builtinName = "CopyBufferToImage3d8BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
@@ -202,6 +222,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyBufferToImage3d6To8Bytes";
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
break;
case ImageBuiltin::copyBufferToImage3d6To8BytesStateless:
builtinName = "CopyBufferToImage3d6To8BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
break;
case ImageBuiltin::copyBufferToImage3d6To8BytesHeapless:
builtinName = "CopyBufferToImage3d6To8BytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
@@ -210,6 +234,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyBufferToImage3dBytes";
builtin = NEO::EBuiltInOps::copyBufferToImage3d;
break;
case ImageBuiltin::copyBufferToImage3dBytesStateless:
builtinName = "CopyBufferToImage3dBytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dStateless;
break;
case ImageBuiltin::copyBufferToImage3dBytesHeapless:
builtinName = "CopyBufferToImage3dBytesStateless";
builtin = NEO::EBuiltInOps::copyBufferToImage3dHeapless;
@@ -218,6 +246,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyImage3dToBuffer16Bytes";
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
break;
case ImageBuiltin::copyImage3dToBuffer16BytesStateless:
builtinName = "CopyImage3dToBuffer16BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
break;
case ImageBuiltin::copyImage3dToBuffer16BytesHeapless:
builtinName = "CopyImage3dToBuffer16BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
@@ -226,6 +258,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyImage3dToBuffer2Bytes";
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
break;
case ImageBuiltin::copyImage3dToBuffer2BytesStateless:
builtinName = "CopyImage3dToBuffer2BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
break;
case ImageBuiltin::copyImage3dToBuffer2BytesHeapless:
builtinName = "CopyImage3dToBuffer2BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
@@ -234,6 +270,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyImage3dToBuffer3Bytes";
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
break;
case ImageBuiltin::copyImage3dToBuffer3BytesStateless:
builtinName = "CopyImage3dToBuffer3BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
break;
case ImageBuiltin::copyImage3dToBuffer3BytesHeapless:
builtinName = "CopyImage3dToBuffer3BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
@@ -242,6 +282,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyImage3dToBuffer4Bytes";
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
break;
case ImageBuiltin::copyImage3dToBuffer4BytesStateless:
builtinName = "CopyImage3dToBuffer4BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
break;
case ImageBuiltin::copyImage3dToBuffer4BytesHeapless:
builtinName = "CopyImage3dToBuffer4BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
@@ -250,6 +294,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyImage3dToBuffer4To3Bytes";
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
break;
case ImageBuiltin::copyImage3dToBuffer4To3BytesStateless:
builtinName = "CopyImage3dToBuffer4To3BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
break;
case ImageBuiltin::copyImage3dToBuffer4To3BytesHeapless:
builtinName = "CopyImage3dToBuffer4To3BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
@@ -258,6 +306,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyImage3dToBuffer6Bytes";
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
break;
case ImageBuiltin::copyImage3dToBuffer6BytesStateless:
builtinName = "CopyImage3dToBuffer6BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
break;
case ImageBuiltin::copyImage3dToBuffer6BytesHeapless:
builtinName = "CopyImage3dToBuffer6BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
@@ -266,6 +318,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyImage3dToBuffer8Bytes";
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
break;
case ImageBuiltin::copyImage3dToBuffer8BytesStateless:
builtinName = "CopyImage3dToBuffer8BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
break;
case ImageBuiltin::copyImage3dToBuffer8BytesHeapless:
builtinName = "CopyImage3dToBuffer8BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
@@ -274,6 +330,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyImage3dToBuffer8To6Bytes";
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
break;
case ImageBuiltin::copyImage3dToBuffer8To6BytesStateless:
builtinName = "CopyImage3dToBuffer8To6BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
break;
case ImageBuiltin::copyImage3dToBuffer8To6BytesHeapless:
builtinName = "CopyImage3dToBuffer8To6BytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;
@@ -282,6 +342,10 @@ void BuiltinFunctionsLibImpl::initBuiltinImageKernel(ImageBuiltin func) {
builtinName = "CopyImage3dToBufferBytes";
builtin = NEO::EBuiltInOps::copyImage3dToBuffer;
break;
case ImageBuiltin::copyImage3dToBufferBytesStateless:
builtinName = "CopyImage3dToBufferBytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferStateless;
break;
case ImageBuiltin::copyImage3dToBufferBytesHeapless:
builtinName = "CopyImage3dToBufferBytesStateless";
builtin = NEO::EBuiltInOps::copyImage3dToBufferHeapless;

View File

@@ -854,32 +854,37 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
return status;
}
bool isStateless = (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) || this->isStatelessBuiltinsEnabled();
if (bufferSize >= 4ull * MemoryConstants::gigaByte) {
isStateless = true;
}
bool isHeaplessEnabled = this->heaplessModeEnabled;
ImageBuiltin builtInType = ImageBuiltin::copyBufferToImage3dBytes;
switch (bytesPerPixel) {
case 1u:
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3dBytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3dBytes>(isStateless, isHeaplessEnabled);
break;
case 2u:
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d2Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d2Bytes>(isStateless, isHeaplessEnabled);
break;
case 4u:
if (image->isMimickedImage()) {
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d3To4Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d3To4Bytes>(isStateless, isHeaplessEnabled);
} else {
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d4Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d4Bytes>(isStateless, isHeaplessEnabled);
}
break;
case 8u:
if (image->isMimickedImage()) {
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d6To8Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d6To8Bytes>(isStateless, isHeaplessEnabled);
} else {
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d8Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d8Bytes>(isStateless, isHeaplessEnabled);
}
break;
case 16u:
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d16Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyBufferToImage3d16Bytes>(isStateless, isHeaplessEnabled);
break;
default:
UNRECOVERABLE_IF(true);
@@ -901,7 +906,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
0};
builtinKernel->setArgumentValue(3u, sizeof(origin), &origin);
if (this->heaplessModeEnabled) {
if (this->heaplessModeEnabled || isStateless) {
uint64_t pitch[] = {srcRowPitch, srcSlicePitch};
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
} else {
@@ -1054,38 +1059,43 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
return status;
}
bool isStateless = (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) || this->isStatelessBuiltinsEnabled();
if (bufferSize >= 4ull * MemoryConstants::gigaByte) {
isStateless = true;
}
bool isHeaplessEnabled = this->heaplessModeEnabled;
ImageBuiltin builtInType = ImageBuiltin::copyBufferToImage3dBytes;
switch (bytesPerPixel) {
case 1u:
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBufferBytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBufferBytes>(isStateless, isHeaplessEnabled);
break;
case 2u:
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer2Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer2Bytes>(isStateless, isHeaplessEnabled);
break;
case 3u:
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer3Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer3Bytes>(isStateless, isHeaplessEnabled);
break;
case 4u:
if (image->isMimickedImage()) {
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer4To3Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer4To3Bytes>(isStateless, isHeaplessEnabled);
} else {
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer4Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer4Bytes>(isStateless, isHeaplessEnabled);
}
break;
case 6u:
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer6Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer6Bytes>(isStateless, isHeaplessEnabled);
break;
case 8u:
if (image->isMimickedImage()) {
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer8To6Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer8To6Bytes>(isStateless, isHeaplessEnabled);
} else {
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer8Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer8Bytes>(isStateless, isHeaplessEnabled);
}
break;
case 16u:
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer16Bytes>(isHeaplessEnabled);
builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImage3dToBuffer16Bytes>(isStateless, isHeaplessEnabled);
break;
default: {
CREATE_DEBUG_STRING(str, "Invalid bytesPerPixel of size: %u\n", bytesPerPixel);
@@ -1110,7 +1120,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
builtinKernel->setArgumentValue(2u, sizeof(origin), &origin);
builtinKernel->setArgumentValue(3u, sizeof(size_t), &allocationStruct.offset);
if (this->heaplessModeEnabled) {
if (this->heaplessModeEnabled || isStateless) {
uint64_t pitch[] = {destRowPitch, destSlicePitch};
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
} else {
@@ -1271,7 +1281,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyRegion(ze_image
return status;
}
ImageBuiltin builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImageRegion>(this->heaplessModeEnabled);
ImageBuiltin builtInType = BuiltinTypeHelper::adjustImageBuiltinType<ImageBuiltin::copyImageRegion>(false, this->heaplessModeEnabled);
auto lock = device->getBuiltinFunctionsLib()->obtainUniqueOwnership();
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(builtInType);

View File

@@ -125,6 +125,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::signalAllEventPackets;
using BaseClass::stateBaseAddressTracking;
using BaseClass::stateComputeModeTracking;
using BaseClass::statelessBuiltinsEnabled;
using BaseClass::syncDispatchQueueId;
using BaseClass::synchronizedDispatchMode;
using BaseClass::unifiedMemoryControls;
@@ -251,6 +252,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::signalAllEventPackets;
using BaseClass::stateBaseAddressTracking;
using BaseClass::stateComputeModeTracking;
using BaseClass::statelessBuiltinsEnabled;
using BaseClass::syncDispatchQueueId;
using BaseClass::synchronizedDispatchMode;
using BaseClass::synchronizeInOrderExecution;

View File

@@ -251,6 +251,50 @@ HWTEST_F(TestBuiltinFunctionsLibImpl, givenHeaplessImageBuiltinsWhenInitBuiltinK
EXPECT_STREQ("CopyImage3dToImage3d", lib.kernelNamePassed.c_str());
}
HWTEST_F(TestBuiltinFunctionsLibImpl, givenStatelessImageBuiltinsWhenInitBuiltinKernelThenCorrectArgumentsArePassed) {
MockCheckPassedArgumentsBuiltinFunctionsLibImpl lib(device, device->getNEODevice()->getBuiltIns());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d16BytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dStateless, lib.builtinPassed);
EXPECT_STREQ("CopyBufferToImage3d16BytesStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d2BytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dStateless, lib.builtinPassed);
EXPECT_STREQ("CopyBufferToImage3d2BytesStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d4BytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dStateless, lib.builtinPassed);
EXPECT_STREQ("CopyBufferToImage3d4BytesStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyBufferToImage3d8BytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyBufferToImage3dStateless, lib.builtinPassed);
EXPECT_STREQ("CopyBufferToImage3d8BytesStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer16BytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
EXPECT_STREQ("CopyImage3dToBuffer16BytesStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer2BytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
EXPECT_STREQ("CopyImage3dToBuffer2BytesStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer4BytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
EXPECT_STREQ("CopyImage3dToBuffer4BytesStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBufferBytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
EXPECT_STREQ("CopyImage3dToBufferBytesStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer3BytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
EXPECT_STREQ("CopyImage3dToBuffer3BytesStateless", lib.kernelNamePassed.c_str());
lib.initBuiltinImageKernel(L0::ImageBuiltin::copyImage3dToBuffer6BytesStateless);
EXPECT_EQ(NEO::EBuiltInOps::copyImage3dToBufferStateless, lib.builtinPassed);
EXPECT_STREQ("CopyImage3dToBuffer6BytesStateless", lib.kernelNamePassed.c_str());
}
HWTEST_F(TestBuiltinFunctionsLibImpl, givenCompilerInterfaceWhenCreateDeviceAndImageSupportedThenBuiltinsImageFunctionsAreLoaded) {
ze_result_t returnValue = ZE_RESULT_SUCCESS;
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->compilerInterface.reset(new NEO::MockCompilerInterfaceSpirv());

View File

@@ -759,6 +759,7 @@ HWTEST2_F(CommandListTest, givenHeaplessWhenAppendImageCopyFromMemoryThenCorrect
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
commandList->heaplessModeEnabled = heaplessEnabled;
commandList->scratchAddressPatchingEnabled = true;
commandList->statelessBuiltinsEnabled = false;
void *srcPtr = reinterpret_cast<void *>(0x1234);
@@ -811,6 +812,7 @@ HWTEST2_F(CommandListTest, givenHeaplessWhenAppendImageCopyToMemoryThenCorrectRo
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
commandList->heaplessModeEnabled = heaplessEnabled;
commandList->scratchAddressPatchingEnabled = true;
commandList->statelessBuiltinsEnabled = false;
void *dstPtr = reinterpret_cast<void *>(0x1234);
@@ -845,6 +847,146 @@ HWTEST2_F(CommandListTest, givenHeaplessWhenAppendImageCopyToMemoryThenCorrectRo
}
}
HWTEST_F(CommandListTest, givenStatelessWhenAppendImageCopyFromMemoryThenCorrectRowAndSlicePitchArePassed) {
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImage3dToBufferBytes);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
mockBuiltinKernel->passedArgumentValues.clear();
mockBuiltinKernel->passedArgumentValues.resize(5);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u));
commandList->cmdListHeapAddressModel = NEO::HeapAddressModel::globalStateless;
commandList->scratchAddressPatchingEnabled = true;
void *srcPtr = reinterpret_cast<void *>(0x1234);
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
zeDesc.type = ZE_IMAGE_TYPE_3D;
zeDesc.width = 4;
zeDesc.height = 2;
zeDesc.depth = 2;
ze_image_region_t dstImgRegion = {2, 1, 1, 4, 2, 2};
auto imageHw = std::make_unique<WhiteBox<::L0::ImageCoreFamily<FamilyType::gfxCoreFamily>>>();
imageHw->initialize(device, &zeDesc);
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendImageCopyFromMemory(imageHw->toHandle(), srcPtr, &dstImgRegion, nullptr, 0, nullptr, copyParams));
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto *passedArgRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].data();
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeRowSlicePitch);
auto bytesPerPixel = static_cast<uint32_t>(imageHw->getImageInfo().surfaceFormat->imageElementSizeInBytes);
uint64_t expectedPitch[] = {dstImgRegion.width * bytesPerPixel, dstImgRegion.height * (dstImgRegion.width * bytesPerPixel)};
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
}
HWTEST_F(CommandListTest, givenStatelessWhenAppendImageCopyToMemoryThenCorrectRowAndSlicePitchArePassed) {
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImage3dToBufferBytes);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
mockBuiltinKernel->passedArgumentValues.clear();
mockBuiltinKernel->passedArgumentValues.resize(5);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u));
commandList->cmdListHeapAddressModel = NEO::HeapAddressModel::globalStateless;
commandList->scratchAddressPatchingEnabled = true;
void *dstPtr = reinterpret_cast<void *>(0x1234);
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
zeDesc.type = ZE_IMAGE_TYPE_3D;
zeDesc.width = 4;
zeDesc.height = 2;
zeDesc.depth = 2;
ze_image_region_t srcImgRegion = {2, 1, 1, 4, 2, 2};
auto imageHw = std::make_unique<WhiteBox<::L0::ImageCoreFamily<FamilyType::gfxCoreFamily>>>();
imageHw->initialize(device, &zeDesc);
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendImageCopyToMemory(dstPtr, imageHw->toHandle(), &srcImgRegion, nullptr, 0, nullptr, copyParams));
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto *passedArgRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].data();
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeRowSlicePitch);
auto bytesPerPixel = static_cast<uint32_t>(imageHw->getImageInfo().surfaceFormat->imageElementSizeInBytes);
uint64_t expectedPitch[] = {srcImgRegion.width * bytesPerPixel, srcImgRegion.height * (srcImgRegion.width * bytesPerPixel)};
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
}
HWTEST_F(CommandListTest, givenBufferGreaterThan4GBWhenAppendImageCopyFromMemoryExtThenRowAndSlicePitchArgumentsAreSetAs64Bit) {
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyBufferToImage3dBytes);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
mockBuiltinKernel->passedArgumentValues.clear();
mockBuiltinKernel->passedArgumentValues.resize(5);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u));
void *srcPtr = reinterpret_cast<void *>(0x1234);
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
zeDesc.type = ZE_IMAGE_TYPE_3D;
zeDesc.width = 4;
zeDesc.height = 4;
zeDesc.depth = 2;
auto imageHw = std::make_unique<WhiteBox<::L0::ImageCoreFamily<FamilyType::gfxCoreFamily>>>();
imageHw->initialize(device, &zeDesc);
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendImageCopyFromMemoryExt(imageHw->toHandle(), srcPtr, nullptr, 0x20000000, 0x80000000, nullptr, 0, nullptr, copyParams));
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto argSizeRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].size();
EXPECT_EQ(sizeof(uint64_t) * 2, argSizeRowSlicePitch);
}
HWTEST_F(CommandListTest, givenImageBufferGreaterThan4GBWhenAppendImageCopyToMemoryExtThenRowAndSlicePitchArgumentsAreSetAs64Bit) {
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(ImageBuiltin::copyImage3dToBufferBytes);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
mockBuiltinKernel->passedArgumentValues.clear();
mockBuiltinKernel->passedArgumentValues.resize(5);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
ASSERT_EQ(ZE_RESULT_SUCCESS, commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u));
void *dstPtr = reinterpret_cast<void *>(0x1234);
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
zeDesc.type = ZE_IMAGE_TYPE_3D;
zeDesc.width = 4;
zeDesc.height = 4;
zeDesc.depth = 2;
auto imageHw = std::make_unique<WhiteBox<::L0::ImageCoreFamily<FamilyType::gfxCoreFamily>>>();
imageHw->initialize(device, &zeDesc);
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendImageCopyToMemoryExt(dstPtr, imageHw->toHandle(), nullptr, 0x20000000, 0x80000000, nullptr, 0, nullptr, copyParams));
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto argSizeRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].size();
EXPECT_EQ(sizeof(uint64_t) * 2, argSizeRowSlicePitch);
}
HWTEST_F(CommandListTest, givenComputeCommandListWhenMemoryCopyInExternalHostAllocationThenBuiltinFlagAndDestinationAllocSystemIsSet) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);