diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 16101b4c63..ecb7f8db53 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2568,8 +2568,10 @@ ze_result_t CommandListCoreFamily::appendBlitFill(void *ptr, cons memcpy_s(&patternToCommand, sizeof(patternToCommand), pattern, patternSize); auto blitProperties = NEO::BlitProperties::constructPropertiesForMemoryFill(gpuAllocation, size, patternToCommand, patternSize, offset); + size_t nBlits = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, patternSize, device->getNEODevice()->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + bool useAdditionalTimestamp = nBlits > 1; if (useAdditionalBlitProperties) { - setAdditionalBlitProperties(blitProperties, signalEvent, false); + setAdditionalBlitProperties(blitProperties, signalEvent, useAdditionalTimestamp); } blitProperties.computeStreamPartitionCount = this->partitionCount; blitProperties.highPriority = isHighPriorityImmediateCmdList(); diff --git a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp index 9c01220d76..4ca2063d0b 100644 --- a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp +++ b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp @@ -311,6 +311,11 @@ void BlitCommandsHelper::appendBlitCommandsForImages(const BlitPropertie dstSlicePitch = std::max(dstSlicePitch, dstRowPitch * dstQPitch); } +template <> +size_t BlitCommandsHelper::getNumberOfBlitsForByteFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + return NEO::BlitCommandsHelper::getNumberOfBlitsForFill(copySize, patternSize, rootDeviceEnvironment, isSystemMemoryPoolUsed); +} + template <> BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryByteFill(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { return NEO::BlitCommandsHelper::dispatchBlitMemoryFill(blitProperties, linearStream, rootDeviceEnvironment); diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index 38ec96aa06..2731ee19ce 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -47,6 +47,9 @@ struct BlitCommandsHelper { bool debugPauseEnabled, bool blitterDirectSubmission, bool relaxedOrderingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getNumberOfBlitsForCopyRegion(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); static size_t getNumberOfBlitsForCopyPerRow(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); + static size_t getNumberOfBlitsForColorFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); + static size_t getNumberOfBlitsForByteFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); + static size_t getNumberOfBlitsForFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed); static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static uint64_t calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice); diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 4e46d3df3f..5b34a2ae20 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -595,6 +595,37 @@ size_t BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(const Vec3 +size_t BlitCommandsHelper::getNumberOfBlitsForFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + auto maxWidthToFill = getMaxBlitWidth(rootDeviceEnvironment); + auto maxHeightToFill = getMaxBlitHeight(rootDeviceEnvironment, isSystemMemoryPoolUsed); + auto nBlits = 0; + uint64_t width = 1; + uint64_t height = 1; + uint64_t sizeToFill = copySize.x / patternSize; + while (sizeToFill != 0) { + if (sizeToFill <= maxWidthToFill) { + width = sizeToFill; + height = 1; + } else { + width = maxWidthToFill; + height = std::min((sizeToFill / width), maxHeightToFill); + } + sizeToFill -= (width * height); + nBlits++; + } + return nBlits; +} + +template +size_t BlitCommandsHelper::getNumberOfBlitsForColorFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + if (patternSize == 1) { + return NEO::BlitCommandsHelper::getNumberOfBlitsForByteFill(copySize, patternSize, rootDeviceEnvironment, isSystemMemoryPoolUsed); + } else { + return NEO::BlitCommandsHelper::getNumberOfBlitsForFill(copySize, patternSize, rootDeviceEnvironment, isSystemMemoryPoolUsed); + } +} + template bool BlitCommandsHelper::preBlitCommandWARequired() { return false; diff --git a/shared/source/helpers/blit_commands_helper_pvc_and_later.inl b/shared/source/helpers/blit_commands_helper_pvc_and_later.inl index cfd9fdfaa3..3e9d930572 100644 --- a/shared/source/helpers/blit_commands_helper_pvc_and_later.inl +++ b/shared/source/helpers/blit_commands_helper_pvc_and_later.inl @@ -11,6 +11,28 @@ namespace NEO { +template +size_t BlitCommandsHelper::getNumberOfBlitsForByteFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + auto maxWidthToFill = getMaxBlitSetWidth(rootDeviceEnvironment); + auto maxHeightToFill = getMaxBlitSetHeight(rootDeviceEnvironment); + auto nBlits = 0; + uint64_t width = 1; + uint64_t height = 1; + uint64_t sizeToFill = copySize.x / patternSize; + while (sizeToFill != 0) { + if (sizeToFill <= maxWidthToFill) { + width = sizeToFill; + height = 1; + } else { + width = maxWidthToFill; + height = std::min((sizeToFill / width), maxHeightToFill); + } + sizeToFill -= (width * height); + nBlits++; + } + return nBlits; +} + template BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryByteFill(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { using MEM_SET = typename Family::MEM_SET; diff --git a/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp index d232499393..931ccc707d 100644 --- a/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_stream_receiver_hw_xe_hpg_core.cpp @@ -150,6 +150,11 @@ void BlitCommandsHelper::appendBlitCommandsBlockCopy(const BlitPropertie } } +template <> +size_t BlitCommandsHelper::getNumberOfBlitsForByteFill(const Vec3 ©Size, size_t patternSize, const RootDeviceEnvironment &rootDeviceEnvironment, bool isSystemMemoryPoolUsed) { + return NEO::BlitCommandsHelper::getNumberOfBlitsForFill(copySize, patternSize, rootDeviceEnvironment, isSystemMemoryPoolUsed); +} + template <> BlitCommandsResult BlitCommandsHelper::dispatchBlitMemoryByteFill(const BlitProperties &blitProperties, LinearStream &linearStream, RootDeviceEnvironment &rootDeviceEnvironment) { return NEO::BlitCommandsHelper::dispatchBlitMemoryFill(blitProperties, linearStream, rootDeviceEnvironment); diff --git a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp index f9902587cf..3aedc8b79d 100644 --- a/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp +++ b/shared/test/unit_test/helpers/blit_commands_helper_tests.cpp @@ -732,13 +732,15 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio uint64_t srcGpuAddr = 0x12345; uint64_t dstGpuAddr = 0x54321; uint64_t clearGpuAddr = 0x5678; + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + size_t maxBlitHeight = static_cast(BlitCommandsHelper::getMaxBlitHeight(pDevice->getRootDeviceEnvironmentRef(), false)); std::unique_ptr srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src))); std::unique_ptr dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst))); std::unique_ptr clearColorAllocation(new MockGraphicsAllocation(clear, clearGpuAddr, sizeof(clear))); Vec3 srcOffsets{1, 0, 0}; Vec3 dstOffsets{1, 0, 0}; - Vec3 copySize{(BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + 1, 2, 2}; + Vec3 copySize{(maxBlitWidth * maxBlitHeight) + 1, 2, 2}; size_t srcRowPitch = 0; size_t srcSlicePitch = 0; @@ -749,7 +751,7 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstAlloc.get(), srcAlloc.get(), dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, clearColorAllocation.get()); - + ASSERT_FALSE(blitProperties.isSystemMemoryPoolUsed); uint32_t streamBuffer[400] = {}; LinearStream stream(streamBuffer, sizeof(streamBuffer)); NEO::BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, stream, pDevice->getRootDeviceEnvironmentRef()); @@ -779,13 +781,15 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio uint64_t srcGpuAddr = 0x12345; uint64_t dstGpuAddr = 0x54321; uint64_t clearGpuAddr = 0x5678; + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + size_t maxBlitHeight = static_cast(BlitCommandsHelper::getMaxBlitHeight(pDevice->getRootDeviceEnvironmentRef(), false)); std::unique_ptr srcAlloc(new MockGraphicsAllocation(src, srcGpuAddr, sizeof(src))); std::unique_ptr dstAlloc(new MockGraphicsAllocation(dst, dstGpuAddr, sizeof(dst))); std::unique_ptr clearColorAllocation(new MockGraphicsAllocation(clear, clearGpuAddr, sizeof(clear))); Vec3 srcOffsets{1, 0, 0}; Vec3 dstOffsets{1, 0, 0}; - Vec3 copySize{(BlitterConstants::maxBlitWidth + 1), (BlitterConstants::maxBlitHeight + 1), 3}; + Vec3 copySize{(maxBlitWidth + 1), (maxBlitHeight + 1), 3}; size_t srcRowPitch = 0; size_t srcSlicePitch = 0; @@ -796,6 +800,7 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio auto blitProperties = NEO::BlitProperties::constructPropertiesForCopy(dstAlloc.get(), srcAlloc.get(), dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, clearColorAllocation.get()); + ASSERT_FALSE(blitProperties.isSystemMemoryPoolUsed); uint32_t streamBuffer[400] = {}; LinearStream stream(streamBuffer, sizeof(streamBuffer)); @@ -827,8 +832,10 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio Vec3 dstOffsets = {0, 0, 0}; Vec3 srcOffsets = {0, 0, 0}; - size_t copySizeX = BlitterConstants::maxBlitWidth - 1; - size_t copySizeY = BlitterConstants::maxBlitHeight - 1; + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + size_t maxBlitHeight = static_cast(BlitCommandsHelper::getMaxBlitHeight(pDevice->getRootDeviceEnvironmentRef(), false)); + size_t copySizeX = maxBlitWidth - 1; + size_t copySizeY = maxBlitHeight - 1; Vec3 copySize = {copySizeX, copySizeY, 0x3}; Vec3 srcSize = {copySizeX, copySizeY, 0x3}; Vec3 dstSize = {copySizeX, copySizeY, 0x3}; @@ -841,6 +848,7 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio auto blitProperties = BlitProperties::constructPropertiesForCopy(&dstAlloc, &srcAlloc, dstOffsets, srcOffsets, copySize, srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, &clearColorAllocation); + ASSERT_FALSE(blitProperties.isSystemMemoryPoolUsed); blitProperties.bytesPerPixel = 4; blitProperties.srcSize = srcSize; blitProperties.dstSize = dstSize; @@ -874,6 +882,8 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesAndSingleBytePatternWith constexpr int32_t setHeight = 60; debugManager.flags.LimitBlitterMaxSetWidth.set(setWidth); debugManager.flags.LimitBlitterMaxSetHeight.set(setHeight); + debugManager.flags.LimitBlitterMaxWidth.set(setWidth); + debugManager.flags.LimitBlitterMaxHeight.set(setHeight); size_t dstSize = 3 * setWidth * setHeight + 1; MockGraphicsAllocation dstAlloc(0, 1u /*num gmms*/, AllocationType::internalHostMemory, @@ -884,6 +894,9 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesAndSingleBytePatternWith auto blitProperties = BlitProperties::constructPropertiesForMemoryFill(&dstAlloc, dstSize, pattern, sizeof(uint8_t), 0); EXPECT_EQ(1u, blitProperties.fillPatternSize); + auto nBlits = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, sizeof(uint8_t), pDevice->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + EXPECT_EQ(4u, nBlits); + uint32_t streamBuffer[400] = {}; LinearStream stream(streamBuffer, sizeof(streamBuffer)); NEO::BlitCommandsHelper::dispatchBlitMemoryColorFill(blitProperties, stream, pDevice->getRootDeviceEnvironmentRef()); @@ -907,13 +920,21 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesAndSingleBytePatternWith } HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditionalPropertiesWhenCallingDispatchBlitMemoryFillThenTheResultsAreTheSame, MatchAny) { - size_t dstSize = 2 * sizeof(uint32_t) * (BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight) + sizeof(uint32_t); + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + size_t maxBlitHeight = static_cast(BlitCommandsHelper::getMaxBlitHeight(pDevice->getRootDeviceEnvironmentRef(), true)); + size_t dstSize = 2 * sizeof(uint32_t) * (maxBlitWidth * maxBlitHeight) + sizeof(uint32_t); MockGraphicsAllocation dstAlloc(0, 1u /*num gmms*/, AllocationType::internalHostMemory, reinterpret_cast(0x1234), 0x1000, 0, dstSize, MemoryPool::system4KBPages, MemoryManager::maxOsContextCount); uint32_t pattern[4] = {}; pattern[0] = 0x4567; auto blitProperties = BlitProperties::constructPropertiesForMemoryFill(&dstAlloc, dstSize, pattern, sizeof(uint32_t), 0); + ASSERT_TRUE(blitProperties.isSystemMemoryPoolUsed); + + auto nBlitsColorFill = NEO::BlitCommandsHelper::getNumberOfBlitsForColorFill(blitProperties.copySize, sizeof(uint32_t), pDevice->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + auto nBlitsFill = NEO::BlitCommandsHelper::getNumberOfBlitsForFill(blitProperties.copySize, sizeof(uint32_t), pDevice->getRootDeviceEnvironmentRef(), blitProperties.isSystemMemoryPoolUsed); + EXPECT_EQ(3u, nBlitsColorFill); + EXPECT_EQ(nBlitsFill, nBlitsColorFill); uint32_t streamBuffer[1200] = {}; LinearStream stream(streamBuffer, sizeof(streamBuffer)); @@ -938,7 +959,8 @@ HWTEST2_F(BlitTests, givenPlatformWithBlitSyncPropertiesWithAndWithoutUseAdditio } HWTEST_F(BlitTests, givenBlitPropertieswithImageOperationWhenCallingEstimateBlitCommandSizeThenBlockCopySizeIsReturned) { - Vec3 copySize{BlitterConstants::maxBlitWidth - 1, 1, 1}; + size_t maxBlitWidth = static_cast(BlitCommandsHelper::getMaxBlitWidth(pDevice->getRootDeviceEnvironmentRef())); + Vec3 copySize{maxBlitWidth - 1, 1, 1}; NEO::CsrDependencies csrDependencies{}; size_t totalSize = NEO::BlitCommandsHelper::estimateBlitCommandSize(copySize, csrDependencies, false, false, true, pDevice->getRootDeviceEnvironmentRef(), false, false);