fix: patch 64bit row and slice pitch for builtins in L0 heapless

Related-To: NEO-12149
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk 2024-12-20 03:30:37 +00:00 committed by Compute-Runtime-Automation
parent 46fa465c34
commit c96ad9dcbf
3 changed files with 150 additions and 20 deletions

View File

@ -785,17 +785,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
builtinKernel->setArgRedescribedImage(1u, image->toHandle());
builtinKernel->setArgumentValue(2u, sizeof(size_t), &allocationStruct.offset);
uint32_t origin[] = {
static_cast<uint32_t>(pDstRegion->originX),
static_cast<uint32_t>(pDstRegion->originY),
static_cast<uint32_t>(pDstRegion->originZ),
0};
uint32_t origin[] = {pDstRegion->originX,
pDstRegion->originY,
pDstRegion->originZ,
0};
builtinKernel->setArgumentValue(3u, sizeof(origin), &origin);
uint32_t pitch[] = {
srcRowPitch,
srcSlicePitch};
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
if (this->heaplessModeEnabled) {
uint64_t pitch[] = {srcRowPitch, srcSlicePitch};
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
} else {
uint32_t pitch[] = {srcRowPitch, srcSlicePitch};
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
}
uint32_t groupSizeX = pDstRegion->width;
uint32_t groupSizeY = pDstRegion->height;
@ -968,20 +970,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
builtinKernel->setArgBufferWithAlloc(1u, allocationStruct.alignedAllocationPtr,
allocationStruct.alloc,
nullptr);
uint32_t origin[] = {
static_cast<uint32_t>(pSrcRegion->originX),
static_cast<uint32_t>(pSrcRegion->originY),
static_cast<uint32_t>(pSrcRegion->originZ),
0};
uint32_t origin[] = {pSrcRegion->originX,
pSrcRegion->originY,
pSrcRegion->originZ,
0};
builtinKernel->setArgumentValue(2u, sizeof(origin), &origin);
builtinKernel->setArgumentValue(3u, sizeof(size_t), &allocationStruct.offset);
uint32_t pitch[] = {
destRowPitch,
destSlicePitch};
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
if (this->heaplessModeEnabled) {
uint64_t pitch[] = {destRowPitch, destSlicePitch};
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
} else {
uint32_t pitch[] = {destRowPitch, destSlicePitch};
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
}
uint32_t groupSizeX = pSrcRegion->width;
uint32_t groupSizeY = pSrcRegion->height;

View File

@ -115,13 +115,29 @@ struct Mock<::L0::KernelImp> : public WhiteBox<::L0::KernelImp> {
printPrintfOutputCalledTimes++;
}
ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) override {
if (checkPassedArgumentValues) {
UNRECOVERABLE_IF(argIndex >= passedArgumentValues.size());
passedArgumentValues[argIndex].resize(argSize);
memcpy(passedArgumentValues[argIndex].data(), pArgValue, argSize);
return ZE_RESULT_SUCCESS;
} else {
return BaseClass::setArgumentValue(argIndex, argSize, pArgValue);
}
}
WhiteBox<::L0::KernelImmutableData> immutableData;
std::vector<std::vector<uint8_t>> passedArgumentValues;
NEO::KernelDescriptor descriptor;
NEO::KernelInfo info;
uint32_t printPrintfOutputCalledTimes = 0;
bool hangDetectedPassedToPrintfOutput = false;
bool enableForcingOfGenerateLocalIdByHw = false;
bool forceGenerateLocalIdByHw = false;
bool checkPassedArgumentValues = false;
};
} // namespace ult

View File

@ -24,6 +24,7 @@
#include "shared/test/common/mocks/mock_memory_operations_handler.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test_checks_shared.h"
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
@ -34,6 +35,8 @@
#include "level_zero/core/test/unit_tests/mocks/mock_image.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "test_traits_common.h"
namespace L0 {
namespace ult {
@ -850,6 +853,115 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenImageCopyFromMemoryThenBui
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
}
struct HeaplessSupportedMatch {
template <PRODUCT_FAMILY productFamily>
static constexpr bool isMatched() {
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
}
};
HWTEST2_F(CommandListTest, givenHeaplessWhenAppendImageCopyFromMemoryThenCorrectRowAndSlicePitchArePassed, HeaplessSupportedMatch) {
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
for (bool heaplessEnabled : {false, true}) {
ImageBuiltin func = heaplessEnabled ? ImageBuiltin::copyBufferToImage3dBytesHeapless : ImageBuiltin::copyBufferToImage3dBytes;
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(func);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
mockBuiltinKernel->passedArgumentValues.clear();
mockBuiltinKernel->passedArgumentValues.resize(5);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
commandList->heaplessModeEnabled = heaplessEnabled;
commandList->scratchAddressPatchingEnabled = true;
void *srcPtr = reinterpret_cast<void *>(0x1234);
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
zeDesc.type = ZE_IMAGE_TYPE_3D;
zeDesc.width = 4;
zeDesc.height = 2;
zeDesc.depth = 2;
ze_image_region_t dstImgRegion = {2, 1, 1, 4, 2, 2};
auto imageHw = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
imageHw->initialize(device, &zeDesc);
auto bytesPerPixel = static_cast<uint32_t>(imageHw->getImageInfo().surfaceFormat->imageElementSizeInBytes);
commandList->appendImageCopyFromMemory(imageHw->toHandle(), srcPtr, &dstImgRegion, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto *passedArgRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].data();
if (heaplessEnabled) {
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeRowSlicePitch);
uint64_t expectedPitch[] = {dstImgRegion.width * bytesPerPixel, dstImgRegion.height * (dstImgRegion.width * bytesPerPixel)};
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
} else {
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeRowSlicePitch);
uint32_t expectedPitch[] = {dstImgRegion.width * bytesPerPixel, dstImgRegion.height * (dstImgRegion.width * bytesPerPixel)};
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
}
}
}
HWTEST2_F(CommandListTest, givenHeaplessWhenAppendImageCopyToMemoryThenCorrectRowAndSlicePitchArePassed, HeaplessSupportedMatch) {
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
for (bool heaplessEnabled : {false, true}) {
ImageBuiltin func = heaplessEnabled ? ImageBuiltin::copyImage3dToBufferBytesHeapless : ImageBuiltin::copyImage3dToBufferBytes;
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(func);
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
mockBuiltinKernel->checkPassedArgumentValues = true;
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
mockBuiltinKernel->passedArgumentValues.clear();
mockBuiltinKernel->passedArgumentValues.resize(5);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
commandList->heaplessModeEnabled = heaplessEnabled;
commandList->scratchAddressPatchingEnabled = true;
void *dstPtr = reinterpret_cast<void *>(0x1234);
ze_image_desc_t zeDesc = {};
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
zeDesc.type = ZE_IMAGE_TYPE_3D;
zeDesc.width = 4;
zeDesc.height = 2;
zeDesc.depth = 2;
ze_image_region_t srcImgRegion = {2, 1, 1, 4, 2, 2};
auto imageHw = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
imageHw->initialize(device, &zeDesc);
auto bytesPerPixel = static_cast<uint32_t>(imageHw->getImageInfo().surfaceFormat->imageElementSizeInBytes);
commandList->appendImageCopyToMemory(dstPtr, imageHw->toHandle(), &srcImgRegion, nullptr, 0, nullptr, false);
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
auto passedArgSizeRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].size();
auto *passedArgRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].data();
if (heaplessEnabled) {
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeRowSlicePitch);
uint64_t expectedPitch[] = {srcImgRegion.width * bytesPerPixel, srcImgRegion.height * (srcImgRegion.width * bytesPerPixel)};
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
} else {
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeRowSlicePitch);
uint32_t expectedPitch[] = {srcImgRegion.width * bytesPerPixel, srcImgRegion.height * (srcImgRegion.width * bytesPerPixel)};
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
}
}
}
HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryCopyInExternalHostAllocationThenBuiltinFlagAndDestinationAllocSystemIsSet, MatchAny) {
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);