From 8836f6df0b82d7194db195fc573c9e26bb27700f Mon Sep 17 00:00:00 2001 From: Szymon Morek Date: Wed, 2 Apr 2025 09:37:44 +0000 Subject: [PATCH] fix: forward mip map level for 3D images Related-To: NEO-14539 1D and 2D images have already mip map level set correctly. Signed-off-by: Szymon Morek --- .../command_queue/command_queue_staging.cpp | 4 +- .../utilities/staging_buffer_manager.cpp | 9 ++-- .../source/utilities/staging_buffer_manager.h | 4 +- .../staging_buffer_manager_tests.cpp | 53 +++++++++++++++---- 4 files changed, 55 insertions(+), 15 deletions(-) diff --git a/opencl/source/command_queue/command_queue_staging.cpp b/opencl/source/command_queue/command_queue_staging.cpp index 8970add01b..0ad007ddf2 100644 --- a/opencl/source/command_queue/command_queue_staging.cpp +++ b/opencl/source/command_queue/command_queue_staging.cpp @@ -15,6 +15,7 @@ #include "opencl/source/context/context.h" #include "opencl/source/event/user_event.h" #include "opencl/source/helpers/base_object.h" +#include "opencl/source/helpers/mipmap.h" #include "opencl/source/mem_obj/buffer.h" #include "opencl/source/mem_obj/image.h" @@ -72,9 +73,10 @@ cl_int CommandQueue::enqueueStagingImageTransfer(cl_command_type commandType, Im auto bytesPerPixel = image->getSurfaceFormatInfo().surfaceFormat.imageElementSizeInBytes; auto dstRowPitch = inputRowPitch ? inputRowPitch : globalRegion[0] * bytesPerPixel; auto dstSlicePitch = inputSlicePitch ? inputSlicePitch : globalRegion[1] * dstRowPitch; + auto isMipMap = isMipMapped(image->getImageDesc()); auto stagingBufferManager = this->context->getStagingBufferManager(); - auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, dstSlicePitch, bytesPerPixel, chunkWrite, &csr, isRead); + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, dstRowPitch, dstSlicePitch, bytesPerPixel, isMipMap, chunkWrite, &csr, isRead); if (isRead && context->isProvidingPerformanceHints()) { auto hostPtrSize = calculateHostPtrSizeForImage(globalRegion, inputRowPitch, inputSlicePitch, image); diff --git a/shared/source/utilities/staging_buffer_manager.cpp b/shared/source/utilities/staging_buffer_manager.cpp index 3d1702b447..8c4c6d98b7 100644 --- a/shared/source/utilities/staging_buffer_manager.cpp +++ b/shared/source/utilities/staging_buffer_manager.cpp @@ -132,7 +132,7 @@ size_t calculateSizeForRegion(size_t region[3], const ImageMetadata &imageMetada } StagingTransferStatus StagingBufferManager::performImageSlicesTransfer(StagingQueue &stagingQueue, size_t &submittedChunks, const void *ptr, auto sliceOffset, - size_t baseRowOffset, size_t rowsToCopy, size_t origin[3], size_t region[3], ImageMetadata &imageMetadata, + size_t baseRowOffset, size_t rowsToCopy, size_t origin[4], size_t region[3], ImageMetadata &imageMetadata, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead) { auto rowPitch = imageMetadata.rowPitch; auto rowsPerChunk = std::max(1ul, chunkSize / rowPitch); @@ -184,12 +184,15 @@ StagingTransferStatus StagingBufferManager::performImageSlicesTransfer(StagingQu * Several slices and rows can be packed into single chunk if size of such chunk does not exceeds maximum chunk size (2MB). * Caller provides actual function to enqueue read/write operation for single chunk. */ -StagingTransferStatus StagingBufferManager::performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t slicePitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead) { +StagingTransferStatus StagingBufferManager::performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t slicePitch, size_t bytesPerPixel, bool isMipMapped, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead) { StagingQueue stagingQueue; - size_t origin[3] = {}; + size_t origin[4] = {}; size_t region[3] = {}; origin[0] = globalOrigin[0]; region[0] = globalRegion[0]; + if (isMipMapped && globalRegion[2] > 1) { + origin[3] = globalOrigin[3]; + } StagingTransferStatus result{}; size_t submittedChunks = 0; diff --git a/shared/source/utilities/staging_buffer_manager.h b/shared/source/utilities/staging_buffer_manager.h index 8602513cec..6736b46cc4 100644 --- a/shared/source/utilities/staging_buffer_manager.h +++ b/shared/source/utilities/staging_buffer_manager.h @@ -91,7 +91,7 @@ class StagingBufferManager : NEO::NonCopyableAndNonMovableClass { bool isValidForStagingTransfer(const Device &device, const void *ptr, size_t size, bool hasDependencies); StagingTransferStatus performCopy(void *dstPtr, const void *srcPtr, size_t size, ChunkCopyFunction &chunkCopyFunc, CommandStreamReceiver *csr); - StagingTransferStatus performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t slicePitch, size_t bytesPerPixel, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead); + StagingTransferStatus performImageTransfer(const void *ptr, const size_t *globalOrigin, const size_t *globalRegion, size_t rowPitch, size_t slicePitch, size_t bytesPerPixel, bool isMipMapped, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead); StagingTransferStatus performBufferTransfer(const void *ptr, size_t globalOffset, size_t globalSize, ChunkTransferBufferFunc &chunkTransferBufferFunc, CommandStreamReceiver *csr, bool isRead); std::pair requestStagingBuffer(size_t &size); @@ -108,7 +108,7 @@ class StagingBufferManager : NEO::NonCopyableAndNonMovableClass { template StagingTransferStatus performChunkTransfer(size_t chunkTransferId, bool isRead, const UserData &userData, StagingQueue ¤tStagingBuffers, CommandStreamReceiver *csr, Func &func, Args... args); StagingTransferStatus performImageSlicesTransfer(StagingQueue &stagingQueue, size_t &submittedChunks, const void *ptr, auto sliceOffset, - size_t baseRowOffset, size_t rowsToCopy, size_t origin[3], size_t region[3], ImageMetadata &imageMetadata, + size_t baseRowOffset, size_t rowsToCopy, size_t origin[4], size_t region[3], ImageMetadata &imageMetadata, ChunkTransferImageFunc &chunkTransferImageFunc, CommandStreamReceiver *csr, bool isRead); WaitStatus copyStagingToHost(const std::pair &transfer, StagingBufferTracker &tracker) const; diff --git a/shared/test/unit_test/utilities/staging_buffer_manager_tests.cpp b/shared/test/unit_test/utilities/staging_buffer_manager_tests.cpp index fab1edf919..b9968d10b1 100644 --- a/shared/test/unit_test/utilities/staging_buffer_manager_tests.cpp +++ b/shared/test/unit_test/utilities/staging_buffer_manager_tests.cpp @@ -134,7 +134,7 @@ class StagingBufferManagerFixture : public DeviceFixture { return 0; }; auto initialNumOfUsmAllocations = svmAllocsManager->svmAllocs.getNumAllocs(); - auto ret = stagingBufferManager->performImageTransfer(hostPtr, globalOrigin, globalRegion, rowPitch, rowPitch * globalRegion[1], pixelElemSize, chunkTransfer, csr, isRead); + auto ret = stagingBufferManager->performImageTransfer(hostPtr, globalOrigin, globalRegion, rowPitch, rowPitch * globalRegion[1], pixelElemSize, false, chunkTransfer, csr, isRead); auto newUsmAllocations = svmAllocsManager->svmAllocs.getNumAllocs() - initialNumOfUsmAllocations; for (auto rowId = 0u; rowId < globalRegion[1]; rowId++) { @@ -585,7 +585,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringChunkReadF }; auto ultCsr = reinterpret_cast *>(csr); ultCsr->waitForTaskCountReturnValue = WaitStatus::gpuHang; - auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true); + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, false, chunkWrite, csr, true); EXPECT_EQ(0, ret.chunkCopyStatus); EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus); EXPECT_EQ(2u, chunkCounter); @@ -607,7 +607,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangAfterChunkReadFr } return 0; }; - auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true); + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, false, chunkWrite, csr, true); EXPECT_EQ(0, ret.chunkCopyStatus); EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus); EXPECT_EQ(4u, chunkCounter); @@ -630,7 +630,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringRemainderC } return 0; }; - auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, true); + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, false, chunkWrite, csr, true); EXPECT_EQ(0, ret.chunkCopyStatus); EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus); EXPECT_EQ(remainderCounter - 1, chunkCounter); @@ -649,7 +649,7 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferWhenFailedChunkImageWriteThen ++chunkCounter; return expectedErrorCode; }; - auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false); + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, false, chunkWrite, csr, false); EXPECT_EQ(expectedErrorCode, ret.chunkCopyStatus); EXPECT_EQ(WaitStatus::ready, ret.waitStatus); EXPECT_EQ(1u, chunkCounter); @@ -672,7 +672,7 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferWhenFailedChunkImageWriteWith } return 0; }; - auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, chunkWrite, csr, false); + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, MemoryConstants::megaByte, MemoryConstants::megaByte, pixelElemSize, false, chunkWrite, csr, false); EXPECT_EQ(expectedErrorCode, ret.chunkCopyStatus); EXPECT_EQ(WaitStatus::ready, ret.waitStatus); EXPECT_EQ(remainderCounter, chunkCounter); @@ -708,7 +708,7 @@ HWTEST_P(StagingBufferManager3DImageTest, givenStagingBufferWhenPerformImageTran ++chunkCounter; return 0; }; - auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, chunkWrite, csr, false); + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, false, chunkWrite, csr, false); EXPECT_EQ(0, ret.chunkCopyStatus); EXPECT_EQ(WaitStatus::ready, ret.waitStatus); EXPECT_EQ(expectedChunks, chunkCounter); @@ -745,7 +745,7 @@ HWTEST_F(StagingBufferManagerTest, givenStagingBufferWhenGpuHangDuringSliceRemai } return 0; }; - auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, chunkWrite, csr, true); + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, false, chunkWrite, csr, true); EXPECT_EQ(0, ret.chunkCopyStatus); EXPECT_EQ(WaitStatus::gpuHang, ret.waitStatus); EXPECT_EQ(expectedChunks - 1, chunkCounter); @@ -771,13 +771,48 @@ TEST_F(StagingBufferManagerTest, givenStagingBufferWhenFailedChunkImageWriteWith } return 0; }; - auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, chunkWrite, csr, false); + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, globalRegion, rowPitch, slicePitch, pixelElemSize, false, chunkWrite, csr, false); EXPECT_EQ(expectedErrorCode, ret.chunkCopyStatus); EXPECT_EQ(WaitStatus::ready, ret.waitStatus); EXPECT_EQ(expectedChunks, chunkCounter); delete[] ptr; } +struct ImageMipMapTestInfo { + size_t region[3]; + size_t expectedMipMapIndex; +}; +class StagingBufferManagerImageMipMapTest : public StagingBufferManagerTest, + public ::testing::WithParamInterface {}; + +TEST_P(StagingBufferManagerImageMipMapTest, givenStagingBufferWhenPerformImageTransferWithMipMappedImageThenOriginsSetCorrectly) { + constexpr auto mipMapLevel = 10u; + size_t globalOrigin[4] = {}; + auto region = GetParam().region; + auto expectedMipMapIndex = GetParam().expectedMipMapIndex; + globalOrigin[expectedMipMapIndex] = mipMapLevel; + unsigned int ptr[256]; + + ChunkTransferImageFunc chunkWrite = [&](void *stagingBuffer, const size_t *origin, const size_t *region) -> int32_t { + // Verify that mip map level was forwarded to passed lambda function at correct origin index. + EXPECT_EQ(mipMapLevel, origin[expectedMipMapIndex]); + return 0; + }; + auto ret = stagingBufferManager->performImageTransfer(ptr, globalOrigin, region, region[0], region[0] * region[1], pixelElemSize, true, chunkWrite, csr, false); + EXPECT_EQ(0, ret.chunkCopyStatus); + EXPECT_EQ(WaitStatus::ready, ret.waitStatus); +} + +ImageMipMapTestInfo imageMipMapTestsInfo[] = { + {{4u, 1u, 1u}, 1}, + {{4u, 4u, 1u}, 2}, + {{4u, 4u, 4u}, 3}}; + +INSTANTIATE_TEST_SUITE_P( + StagingBufferManagerTest_, + StagingBufferManagerImageMipMapTest, + testing::ValuesIn(imageMipMapTestsInfo)); + TEST_F(StagingBufferManagerTest, givenStagingBufferWhenPerformBufferTransferThenCopyData) { constexpr size_t numOfChunkCopies = 8; constexpr size_t remainder = 1024;