From 2773bd8ec0ddd96f05b1530a62a4885968c387a3 Mon Sep 17 00:00:00 2001 From: Young Jin Yoon Date: Fri, 23 May 2025 08:20:32 +0000 Subject: [PATCH] fix: estimate blitter commands for memory fill Added getNumberOfBlitsForColorFill, getNumberofBlitsForByteFill, and getNumberOfBlitsForFill in BlitCommandsHelper to estimate total number of blitter command generated for dispatchBlitMemoryColorFill, dispatchBlitMemoryByteFill, and dispatchBlitMemoryFill, respectively. Use getNumberOfBlitsForColorFill to determine whether additionalTimestampNode is required for appendMemoryBlitFill. Related-To: NEO-13003 Signed-off-by: Young Jin Yoon --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 4 ++- .../command_stream_receiver_hw_gen12lp.cpp | 5 +++ shared/source/helpers/blit_commands_helper.h | 3 ++ .../helpers/blit_commands_helper_base.inl | 31 ++++++++++++++++ .../blit_commands_helper_pvc_and_later.inl | 22 ++++++++++++ ...command_stream_receiver_hw_xe_hpg_core.cpp | 5 +++ .../helpers/blit_commands_helper_tests.cpp | 36 +++++++++++++++---- 7 files changed, 98 insertions(+), 8 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 16101b4c63..ecb7f8db53 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2568,8 +2568,10 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, cons memcpy_s(&patternToCommand, sizeof(patternToCommand), pattern, patternSize); auto blitProperties = NEO::BlitProperties::constructPropertiesForMemoryFill(gpuAllocation, size, patternToCommand, patternSize, offset); + size_t nBlits = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, patternSize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + bool useAdditionalTimestamp = nBlits > 1; if (useAdditionalBlitProperties) { - setAdditionalBlitProperties(blitProperties, signalEvent, false); + setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp); } blitProperties.computeStreamPartitionCount = this->partitionCount; blitProperties.highPriority = isHighPriorityImmediateCmdList(); diff --git a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp index 9c01220d76..4ca2063d0b 100644 --- a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp +++ b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp @@ -311,6 +311,11 @@ void BlitCommandsHelper::appendBlitCommandsForImages(const BlitPropertie dstSlicePitch = std::max(dstSlicePitch, dstRowPitch * dstQPitch); } +template <> +size_t BlitCommandsHelper::getNumberOfBlitsForByteFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + return NEO::BlitCommandsHelper::getNumberOfBlitsForFill(copySize, patternSize, rootDeviceEnvironment, isSystemMemoryPoolUsed); +} + template <> BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryByteFill(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { return NEO::BlitCommandsHelper::dispatchBlitMemoryFill(blitProperties, linearStream, rootDeviceEnvironment); diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index 38ec96aa06..2731ee19ce 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -47,6 +47,9 @@ struct BlitCommandsHelper { bool debugPauseEnabled, bool blitterDirectSubmission, bool relaxedOrderingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getNumberOfBlitsForCopyRegion(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); static size_t getNumberOfBlitsForCopyPerRow(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); + static size_t getNumberOfBlitsForColorFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); + static size_t getNumberOfBlitsForByteFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); + static size_t getNumberOfBlitsForFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static uint64_t calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice); diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 4e46d3df3f..5b34a2ae20 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -595,6 +595,37 @@ size_t BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(const Vec3 +size_t BlitCommandsHelper::getNumberOfBlitsForFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + auto maxWidthToFill = getMaxBlitWidth(rootDeviceEnvironment); + auto maxHeightToFill = getMaxBlitHeight(rootDeviceEnvironment, isSystemMemoryPoolUsed); + auto nBlits = 0; + uint64_t width = 1; + uint64_t height = 1; + uint64_t sizeToFill = copySize.x / patternSize; + while (sizeToFill != 0) { + if (sizeToFill <= maxWidthToFill) { + width = sizeToFill; + height = 1; + } else { + width = maxWidthToFill; + height = std::min((sizeToFill / width), maxHeightToFill); + } + sizeToFill -= (width * height); + nBlits++; + } + return nBlits; +} + +template +size_t BlitCommandsHelper::getNumberOfBlitsForColorFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + if (patternSize == 1) { + return NEO::BlitCommandsHelper::getNumberOfBlitsForByteFill(copySize, patternSize, rootDeviceEnvironment, isSystemMemoryPoolUsed); + } else { + return NEO::BlitCommandsHelper::getNumberOfBlitsForFill(copySize, patternSize, rootDeviceEnvironment, isSystemMemoryPoolUsed); + } +} + template bool BlitCommandsHelper::preBlitCommandWARequired() { return false; diff --git a/shared/source/helpers/blit_commands_helper_pvc_and_later.inl b/shared/source/helpers/blit_commands_helper_pvc_and_later.inl index cfd9fdfaa3..3e9d930572 100644 --- a/shared/source/helpers/blit_commands_helper_pvc_and_later.inl +++ b/shared/source/helpers/blit_commands_helper_pvc_and_later.inl @@ -11,6 +11,28 @@ namespace NEO { +template +size_t BlitCommandsHelper::getNumberOfBlitsForByteFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + auto maxWidthToFill = getMaxBlitSetWidth(rootDeviceEnvironment); + auto maxHeightToFill = getMaxBlitSetHeight(rootDeviceEnvironment); + auto nBlits = 0; + uint64_t width = 1; + uint64_t height = 1; + uint64_t sizeToFill = copySize.x / patternSize; + while (sizeToFill != 0) { + if (sizeToFill <= maxWidthToFill) { + width = sizeToFill; + height = 1; + } else { + width = maxWidthToFill; + height = std::min((sizeToFill / width), maxHeightToFill); + } + sizeToFill -= (width * height); + nBlits++; + } + return nBlits; +} + template BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryByteFill(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { using MEM_SET = typename Family::MEM_SET; diff --git a/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp index d232499393..931ccc707d 100644 --- a/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp @@ -150,6 +150,11 @@ void BlitCommandsHelper::appendBlitCommandsBlockCopy(const BlitPropertie } } +template <> +size_t BlitCommandsHelper::getNumberOfBlitsForByteFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + return NEO::BlitCommandsHelper::getNumberOfBlitsForFill(copySize, patternSize, rootDeviceEnvironment, isSystemMemoryPoolUsed); +} + template <> BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryByteFill(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { return NEO::BlitCommandsHelper::dispatchBlitMemoryFill(blitProperties, linearStream, rootDeviceEnvironment); diff --git a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp index f9902587cf..3aedc8b79d 100644 --- a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp +++ b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp @@ -732,13 +732,15 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio uint64_t srcGpuAddr = 0x12345; uint64_t dstGpuAddr = 0x54321; uint64_t clearGpuAddr = 0x5678; + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + size_t maxBlitHeight = static_cast(BlitCommandsHelper::getMaxBlitHeight(pDevice->getRootDeviceEnvironmentRef(), false)); std::unique_ptr srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src))); std::unique_ptr dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst))); std::unique_ptr clearColorAllocation(new MockGraphicsAllocation(clear, clearGpuAddr, sizeof(clear))); Vec3 srcOffsets{1, 0, 0}; Vec3 dstOffsets{1, 0, 0}; - Vec3 copySize{(BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 1, 2, 2}; + Vec3 copySize{(maxBlitWidth * maxBlitHeight) + 1, 2, 2}; size_t srcRowPitch = 0; size_t srcSlicePitch = 0; @@ -749,7 +751,7 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstAlloc.get(), srcAlloc.get(), dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, clearColorAllocation.get()); - + ASSERT_FALSE(blitProperties.isSystemMemoryPoolUsed); uint32_t streamBuffer[400] = {}; LinearStream stream(streamBuffer, sizeof(streamBuffer)); NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, stream, pDevice->getRootDeviceEnvironmentRef()); @@ -779,13 +781,15 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio uint64_t srcGpuAddr = 0x12345; uint64_t dstGpuAddr = 0x54321; uint64_t clearGpuAddr = 0x5678; + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + size_t maxBlitHeight = static_cast(BlitCommandsHelper::getMaxBlitHeight(pDevice->getRootDeviceEnvironmentRef(), false)); std::unique_ptr srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src))); std::unique_ptr dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst))); std::unique_ptr clearColorAllocation(new MockGraphicsAllocation(clear, clearGpuAddr, sizeof(clear))); Vec3 srcOffsets{1, 0, 0}; Vec3 dstOffsets{1, 0, 0}; - Vec3 copySize{(BlitterConstants::maxBlitWidth + 1), (BlitterConstants::maxBlitHeight + 1), 3}; + Vec3 copySize{(maxBlitWidth + 1), (maxBlitHeight + 1), 3}; size_t srcRowPitch = 0; size_t srcSlicePitch = 0; @@ -796,6 +800,7 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstAlloc.get(), srcAlloc.get(), dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, clearColorAllocation.get()); + ASSERT_FALSE(blitProperties.isSystemMemoryPoolUsed); uint32_t streamBuffer[400] = {}; LinearStream stream(streamBuffer, sizeof(streamBuffer)); @@ -827,8 +832,10 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio Vec3 dstOffsets = {0, 0, 0}; Vec3 srcOffsets = {0, 0, 0}; - size_t copySizeX = BlitterConstants::maxBlitWidth - 1; - size_t copySizeY = BlitterConstants::maxBlitHeight - 1; + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + size_t maxBlitHeight = static_cast(BlitCommandsHelper::getMaxBlitHeight(pDevice->getRootDeviceEnvironmentRef(), false)); + size_t copySizeX = maxBlitWidth - 1; + size_t copySizeY = maxBlitHeight - 1; Vec3 copySize = {copySizeX, copySizeY, 0x3}; Vec3 srcSize = {copySizeX, copySizeY, 0x3}; Vec3 dstSize = {copySizeX, copySizeY, 0x3}; @@ -841,6 +848,7 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio auto blitProperties = BlitProperties::constructPropertiesForCopy(&dstAlloc, &srcAlloc, dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, &clearColorAllocation); + ASSERT_FALSE(blitProperties.isSystemMemoryPoolUsed); blitProperties.bytesPerPixel = 4; blitProperties.srcSize = srcSize; blitProperties.dstSize = dstSize; @@ -874,6 +882,8 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesAndSingleBytePatternWith constexpr int32_t setHeight = 60; debugManager.flags.LimitBlitterMaxSetWidth.set(setWidth); debugManager.flags.LimitBlitterMaxSetHeight.set(setHeight); + debugManager.flags.LimitBlitterMaxWidth.set(setWidth); + debugManager.flags.LimitBlitterMaxHeight.set(setHeight); size_t dstSize = 3 * setWidth * setHeight + 1; MockGraphicsAllocation dstAlloc(0, 1u /*num gmms*/, AllocationType::internalHostMemory, @@ -884,6 +894,9 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesAndSingleBytePatternWith auto blitProperties = BlitProperties::constructPropertiesForMemoryFill(&dstAlloc, dstSize, pattern, sizeof(uint8_t), 0); EXPECT_EQ(1u, blitProperties.fillPatternSize); + auto nBlits = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, sizeof(uint8_t), pDevice->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + EXPECT_EQ(4u, nBlits); + uint32_t streamBuffer[400] = {}; LinearStream stream(streamBuffer, sizeof(streamBuffer)); NEO::BlitCommandsHelper::dispatchBlitMemoryColorFill(blitProperties, stream, pDevice->getRootDeviceEnvironmentRef()); @@ -907,13 +920,21 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesAndSingleBytePatternWith } HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditionalPropertiesWhenCallingDispatchBlitMemoryFillThenTheResultsAreTheSame, MatchAny) { - size_t dstSize = 2 * sizeof(uint32_t) * (BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + sizeof(uint32_t); + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + size_t maxBlitHeight = static_cast(BlitCommandsHelper::getMaxBlitHeight(pDevice->getRootDeviceEnvironmentRef(), true)); + size_t dstSize = 2 * sizeof(uint32_t) * (maxBlitWidth * maxBlitHeight) + sizeof(uint32_t); MockGraphicsAllocation dstAlloc(0, 1u /*num gmms*/, AllocationType::internalHostMemory, reinterpret_cast(0x1234), 0x1000, 0, dstSize, MemoryPool::system4KBPages, MemoryManager::maxOsContextCount); uint32_t pattern[4] = {}; pattern[0] = 0x4567; auto blitProperties = BlitProperties::constructPropertiesForMemoryFill(&dstAlloc, dstSize, pattern, sizeof(uint32_t), 0); + ASSERT_TRUE(blitProperties.isSystemMemoryPoolUsed); + + auto nBlitsColorFill = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, sizeof(uint32_t), pDevice->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + auto nBlitsFill = NEO::BlitCommandsHelper::getNumberOfBlitsForFill(blitProperties.copySize, sizeof(uint32_t), pDevice->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + EXPECT_EQ(3u, nBlitsColorFill); + EXPECT_EQ(nBlitsFill, nBlitsColorFill); uint32_t streamBuffer[1200] = {}; LinearStream stream(streamBuffer, sizeof(streamBuffer)); @@ -938,7 +959,8 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio } HWTEST_F(BlitTests, givenBlitPropertieswithImageOperationWhenCallingEstimateBlitCommandSizeThenBlockCopySizeIsReturned) { - Vec3 copySize{BlitterConstants::maxBlitWidth - 1, 1, 1}; + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + Vec3 copySize{maxBlitWidth - 1, 1, 1}; NEO::CsrDependencies csrDependencies{}; size_t totalSize = NEO::BlitCommandsHelper::estimateBlitCommandSize(copySize, csrDependencies, false, false, true, pDevice->getRootDeviceEnvironmentRef(), false, false);