fix: patch 64bit row and slice pitch for builtins in L0 heapless
Related-To: NEO-12149 Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
parent
46fa465c34
commit
c96ad9dcbf
|
@ -785,17 +785,19 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemoryExt(z
|
|||
builtinKernel->setArgRedescribedImage(1u, image->toHandle());
|
||||
builtinKernel->setArgumentValue(2u, sizeof(size_t), &allocationStruct.offset);
|
||||
|
||||
uint32_t origin[] = {
|
||||
static_cast<uint32_t>(pDstRegion->originX),
|
||||
static_cast<uint32_t>(pDstRegion->originY),
|
||||
static_cast<uint32_t>(pDstRegion->originZ),
|
||||
0};
|
||||
uint32_t origin[] = {pDstRegion->originX,
|
||||
pDstRegion->originY,
|
||||
pDstRegion->originZ,
|
||||
0};
|
||||
builtinKernel->setArgumentValue(3u, sizeof(origin), &origin);
|
||||
|
||||
uint32_t pitch[] = {
|
||||
srcRowPitch,
|
||||
srcSlicePitch};
|
||||
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
|
||||
if (this->heaplessModeEnabled) {
|
||||
uint64_t pitch[] = {srcRowPitch, srcSlicePitch};
|
||||
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
|
||||
} else {
|
||||
uint32_t pitch[] = {srcRowPitch, srcSlicePitch};
|
||||
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
|
||||
}
|
||||
|
||||
uint32_t groupSizeX = pDstRegion->width;
|
||||
uint32_t groupSizeY = pDstRegion->height;
|
||||
|
@ -968,20 +970,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemoryExt(voi
|
|||
builtinKernel->setArgBufferWithAlloc(1u, allocationStruct.alignedAllocationPtr,
|
||||
allocationStruct.alloc,
|
||||
nullptr);
|
||||
|
||||
uint32_t origin[] = {
|
||||
static_cast<uint32_t>(pSrcRegion->originX),
|
||||
static_cast<uint32_t>(pSrcRegion->originY),
|
||||
static_cast<uint32_t>(pSrcRegion->originZ),
|
||||
0};
|
||||
uint32_t origin[] = {pSrcRegion->originX,
|
||||
pSrcRegion->originY,
|
||||
pSrcRegion->originZ,
|
||||
0};
|
||||
builtinKernel->setArgumentValue(2u, sizeof(origin), &origin);
|
||||
|
||||
builtinKernel->setArgumentValue(3u, sizeof(size_t), &allocationStruct.offset);
|
||||
|
||||
uint32_t pitch[] = {
|
||||
destRowPitch,
|
||||
destSlicePitch};
|
||||
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
|
||||
if (this->heaplessModeEnabled) {
|
||||
uint64_t pitch[] = {destRowPitch, destSlicePitch};
|
||||
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
|
||||
} else {
|
||||
uint32_t pitch[] = {destRowPitch, destSlicePitch};
|
||||
builtinKernel->setArgumentValue(4u, sizeof(pitch), &pitch);
|
||||
}
|
||||
|
||||
uint32_t groupSizeX = pSrcRegion->width;
|
||||
uint32_t groupSizeY = pSrcRegion->height;
|
||||
|
|
|
@ -115,13 +115,29 @@ struct Mock<::L0::KernelImp> : public WhiteBox<::L0::KernelImp> {
|
|||
printPrintfOutputCalledTimes++;
|
||||
}
|
||||
|
||||
ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) override {
|
||||
|
||||
if (checkPassedArgumentValues) {
|
||||
UNRECOVERABLE_IF(argIndex >= passedArgumentValues.size());
|
||||
|
||||
passedArgumentValues[argIndex].resize(argSize);
|
||||
memcpy(passedArgumentValues[argIndex].data(), pArgValue, argSize);
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
} else {
|
||||
return BaseClass::setArgumentValue(argIndex, argSize, pArgValue);
|
||||
}
|
||||
}
|
||||
|
||||
WhiteBox<::L0::KernelImmutableData> immutableData;
|
||||
std::vector<std::vector<uint8_t>> passedArgumentValues;
|
||||
NEO::KernelDescriptor descriptor;
|
||||
NEO::KernelInfo info;
|
||||
uint32_t printPrintfOutputCalledTimes = 0;
|
||||
bool hangDetectedPassedToPrintfOutput = false;
|
||||
bool enableForcingOfGenerateLocalIdByHw = false;
|
||||
bool forceGenerateLocalIdByHw = false;
|
||||
bool checkPassedArgumentValues = false;
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "shared/test/common/mocks/mock_memory_operations_handler.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
#include "shared/test/common/test_macros/test_checks_shared.h"
|
||||
|
||||
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
|
@ -34,6 +35,8 @@
|
|||
#include "level_zero/core/test/unit_tests/mocks/mock_image.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
|
||||
#include "test_traits_common.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
|
@ -850,6 +853,115 @@ HWTEST2_F(CommandListTest, givenComputeCommandListWhenImageCopyFromMemoryThenBui
|
|||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
}
|
||||
|
||||
struct HeaplessSupportedMatch {
|
||||
template <PRODUCT_FAMILY productFamily>
|
||||
static constexpr bool isMatched() {
|
||||
return TestTraits<NEO::ToGfxCoreFamily<productFamily>::get()>::heaplessAllowed;
|
||||
}
|
||||
};
|
||||
|
||||
HWTEST2_F(CommandListTest, givenHeaplessWhenAppendImageCopyFromMemoryThenCorrectRowAndSlicePitchArePassed, HeaplessSupportedMatch) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
for (bool heaplessEnabled : {false, true}) {
|
||||
ImageBuiltin func = heaplessEnabled ? ImageBuiltin::copyBufferToImage3dBytesHeapless : ImageBuiltin::copyBufferToImage3dBytes;
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(func);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
mockBuiltinKernel->checkPassedArgumentValues = true;
|
||||
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
|
||||
mockBuiltinKernel->passedArgumentValues.clear();
|
||||
mockBuiltinKernel->passedArgumentValues.resize(5);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
|
||||
commandList->heaplessModeEnabled = heaplessEnabled;
|
||||
commandList->scratchAddressPatchingEnabled = true;
|
||||
|
||||
void *srcPtr = reinterpret_cast<void *>(0x1234);
|
||||
|
||||
ze_image_desc_t zeDesc = {};
|
||||
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||
zeDesc.type = ZE_IMAGE_TYPE_3D;
|
||||
zeDesc.width = 4;
|
||||
zeDesc.height = 2;
|
||||
zeDesc.depth = 2;
|
||||
|
||||
ze_image_region_t dstImgRegion = {2, 1, 1, 4, 2, 2};
|
||||
|
||||
auto imageHw = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
|
||||
imageHw->initialize(device, &zeDesc);
|
||||
auto bytesPerPixel = static_cast<uint32_t>(imageHw->getImageInfo().surfaceFormat->imageElementSizeInBytes);
|
||||
|
||||
commandList->appendImageCopyFromMemory(imageHw->toHandle(), srcPtr, &dstImgRegion, nullptr, 0, nullptr, false);
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto *passedArgRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].data();
|
||||
|
||||
if (heaplessEnabled) {
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeRowSlicePitch);
|
||||
uint64_t expectedPitch[] = {dstImgRegion.width * bytesPerPixel, dstImgRegion.height * (dstImgRegion.width * bytesPerPixel)};
|
||||
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
|
||||
} else {
|
||||
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeRowSlicePitch);
|
||||
uint32_t expectedPitch[] = {dstImgRegion.width * bytesPerPixel, dstImgRegion.height * (dstImgRegion.width * bytesPerPixel)};
|
||||
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListTest, givenHeaplessWhenAppendImageCopyToMemoryThenCorrectRowAndSlicePitchArePassed, HeaplessSupportedMatch) {
|
||||
REQUIRE_IMAGES_OR_SKIP(defaultHwInfo);
|
||||
|
||||
for (bool heaplessEnabled : {false, true}) {
|
||||
ImageBuiltin func = heaplessEnabled ? ImageBuiltin::copyImage3dToBufferBytesHeapless : ImageBuiltin::copyImage3dToBufferBytes;
|
||||
|
||||
auto kernel = device->getBuiltinFunctionsLib()->getImageFunction(func);
|
||||
auto mockBuiltinKernel = static_cast<Mock<::L0::KernelImp> *>(kernel);
|
||||
|
||||
mockBuiltinKernel->checkPassedArgumentValues = true;
|
||||
mockBuiltinKernel->setArgRedescribedImageCallBase = false;
|
||||
mockBuiltinKernel->passedArgumentValues.clear();
|
||||
mockBuiltinKernel->passedArgumentValues.resize(5);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
|
||||
commandList->heaplessModeEnabled = heaplessEnabled;
|
||||
commandList->scratchAddressPatchingEnabled = true;
|
||||
|
||||
void *dstPtr = reinterpret_cast<void *>(0x1234);
|
||||
|
||||
ze_image_desc_t zeDesc = {};
|
||||
zeDesc.stype = ZE_STRUCTURE_TYPE_IMAGE_DESC;
|
||||
zeDesc.type = ZE_IMAGE_TYPE_3D;
|
||||
zeDesc.width = 4;
|
||||
zeDesc.height = 2;
|
||||
zeDesc.depth = 2;
|
||||
|
||||
ze_image_region_t srcImgRegion = {2, 1, 1, 4, 2, 2};
|
||||
|
||||
auto imageHw = std::make_unique<WhiteBox<::L0::ImageCoreFamily<gfxCoreFamily>>>();
|
||||
imageHw->initialize(device, &zeDesc);
|
||||
auto bytesPerPixel = static_cast<uint32_t>(imageHw->getImageInfo().surfaceFormat->imageElementSizeInBytes);
|
||||
|
||||
commandList->appendImageCopyToMemory(dstPtr, imageHw->toHandle(), &srcImgRegion, nullptr, 0, nullptr, false);
|
||||
EXPECT_TRUE(commandList->usedKernelLaunchParams.isBuiltInKernel);
|
||||
|
||||
auto passedArgSizeRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].size();
|
||||
auto *passedArgRowSlicePitch = mockBuiltinKernel->passedArgumentValues[4u].data();
|
||||
|
||||
if (heaplessEnabled) {
|
||||
EXPECT_EQ(sizeof(uint64_t) * 2, passedArgSizeRowSlicePitch);
|
||||
uint64_t expectedPitch[] = {srcImgRegion.width * bytesPerPixel, srcImgRegion.height * (srcImgRegion.width * bytesPerPixel)};
|
||||
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
|
||||
} else {
|
||||
EXPECT_EQ(sizeof(uint32_t) * 2, passedArgSizeRowSlicePitch);
|
||||
uint32_t expectedPitch[] = {srcImgRegion.width * bytesPerPixel, srcImgRegion.height * (srcImgRegion.width * bytesPerPixel)};
|
||||
EXPECT_EQ(0, memcmp(passedArgRowSlicePitch, expectedPitch, passedArgSizeRowSlicePitch));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListTest, givenComputeCommandListWhenMemoryCopyInExternalHostAllocationThenBuiltinFlagAndDestinationAllocSystemIsSet, MatchAny) {
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
commandList->initialize(device, NEO::EngineGroupType::renderCompute, 0u);
|
||||
|
|
Loading…
Reference in New Issue