From 48dc0a823d4bc933255e05e055ae903610ed96d2 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Tue, 10 Sep 2024 12:57:48 +0000 Subject: [PATCH] fix: Check dst pitch in copy rect misaligned case Signed-off-by: Lukasz Jobczyk --- .../built_ins/builtins_dispatch_builder.cpp | 13 ++++--- .../unit_test/built_ins/built_in_tests.cpp | 35 +++++++++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/opencl/source/built_ins/builtins_dispatch_builder.cpp b/opencl/source/built_ins/builtins_dispatch_builder.cpp index 4bb993b128..8f116a363f 100644 --- a/opencl/source/built_ins/builtins_dispatch_builder.cpp +++ b/opencl/source/built_ins/builtins_dispatch_builder.cpp @@ -240,17 +240,20 @@ class BuiltInOp : public BuiltinDispatchInfoBuilder leftSize = (leftSize > 0) ? (middleAlignment - leftSize) : 0; // calc left leftover size leftSize = std::min(leftSize, operationParams.size.x); // clamp left leftover size to requested size - uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size - rightSize = std::min(rightSize, (operationParams.size.x > leftSize) ? (operationParams.size.x - leftSize) : 0); // clamp + uintptr_t rightSize = (start + operationParams.size.x) % middleAlignment; // calc right leftover size + rightSize = std::min(rightSize, operationParams.size.x - leftSize); // clamp const uintptr_t middleSizeBytes = (operationParams.size.x > leftSize + rightSize) ? operationParams.size.x - leftSize - rightSize : 0u; // calc middle size // corner case - fully optimized kernel requires DWORD alignment. If we don't have it, run slower, misaligned kernel const auto srcMiddleStart = reinterpret_cast(srcPtr) + operationParams.srcOffset.x + leftSize; const auto srcMisalignment = srcMiddleStart % sizeof(uint32_t); - const auto rowPitchMisalignment = operationParams.srcRowPitch % sizeof(uint32_t); - const auto slicePitchMisalignment = operationParams.srcSlicePitch % sizeof(uint32_t); - const auto isSrcMisaligned = srcMisalignment != 0u || rowPitchMisalignment != 0u || slicePitchMisalignment != 0u; + const auto srcRowPitchMisalignment = operationParams.srcRowPitch % sizeof(uint32_t); + const auto srcSlicePitchMisalignment = operationParams.srcSlicePitch % sizeof(uint32_t); + const auto dstRowPitchMisalignment = operationParams.dstRowPitch % sizeof(uint32_t); + const auto dstSlicePitchMisalignment = operationParams.dstSlicePitch % sizeof(uint32_t); + const auto isSrcMisaligned = srcMisalignment != 0u || srcRowPitchMisalignment != 0u || srcSlicePitchMisalignment != 0u || dstRowPitchMisalignment != 0u || dstSlicePitchMisalignment != 0u; + ; const auto middleSizeEls = middleSizeBytes / middleElSize; // num work items in middle walker diff --git a/opencl/test/unit_test/built_ins/built_in_tests.cpp b/opencl/test/unit_test/built_ins/built_in_tests.cpp index 4297f933ae..88af577c95 100644 --- a/opencl/test/unit_test/built_ins/built_in_tests.cpp +++ b/opencl/test/unit_test/built_ins/built_in_tests.cpp @@ -987,6 +987,41 @@ TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderCopyBufferToLocalBufferRect } } +TEST_F(BuiltInTests, givenMisalignedDstPitchWhenBuilderCopyBufferRectSplitIsUsedThenParamsAreCorrect) { + if (is32bit || !pClDevice->getProductHelper().isCopyBufferRectSplitSupported()) { + GTEST_SKIP(); + } + + BuiltinDispatchInfoBuilder &builder = BuiltInDispatchBuilderOp::getBuiltinDispatchInfoBuilder(EBuiltInOps::copyBufferRectStateless, *pClDevice); + + uint64_t bigSize = 10ull * MemoryConstants::gigaByte; + uint64_t size = 4ull * MemoryConstants::gigaByte; + + MockBuffer srcBuffer; + srcBuffer.size = static_cast(bigSize); + MockBuffer dstBuffer; + dstBuffer.size = static_cast(bigSize); + + srcBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer); + dstBuffer.mockGfxAllocation.setAllocationType(AllocationType::buffer); + + BuiltinOpParams dc; + dc.srcMemObj = &srcBuffer; + dc.dstMemObj = &dstBuffer; + dc.srcOffset = {0, 0, 0}; + dc.dstOffset = {0, 0, 0}; + dc.size = {static_cast(size), 1, 1}; + dc.srcRowPitch = static_cast(size); + dc.srcSlicePitch = 0; + dc.dstRowPitch = static_cast(size); + dc.dstSlicePitch = 1; + + MultiDispatchInfo multiDispatchInfo(dc); + ASSERT_TRUE(builder.buildDispatchInfos(multiDispatchInfo)); + EXPECT_EQ(1u, multiDispatchInfo.size()); + EXPECT_TRUE(compareBuiltinOpParams(multiDispatchInfo.peekBuiltinOpParams(), dc)); +} + TEST_F(BuiltInTests, givenBigOffsetAndSizeWhenBuilderFillSystemBufferStatelessIsUsedThenParamsAreCorrect) { if (is32bit) { GTEST_SKIP();