From c2dbdb6797c83d5815fd461244a669fd2e5fa9ff Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Wed, 22 Jan 2025 14:38:36 +0000 Subject: [PATCH] refactor: move blit post sync data to BlitProperties Related-To: NEO-13003 Signed-off-by: Bartosz Dunajski --- .../source/printf_handler/printf_handler.cpp | 4 +- opencl/source/command_queue/enqueue_common.h | 9 +- opencl/source/helpers/task_information.cpp | 9 +- opencl/source/program/printf_handler.cpp | 4 +- .../copy_engine_aub_tests_xehp_and_later.h | 4 +- .../command_queue/blit_enqueue_fixture.h | 4 +- ...and_stream_receiver_flush_task_2_tests.cpp | 16 +- .../command_stream_receiver_hw_2_tests.cpp | 30 ++-- .../command_stream_receiver_hw_fixture.h | 4 +- opencl/test/unit_test/kernel/kernel_tests.cpp | 2 +- opencl/test/unit_test/mocks/mock_context.cpp | 4 +- ..._stream_receiver_hw_tests_xe2_hpg_core.cpp | 15 +- .../copy_engine_tests_xe2_hpg_core.cpp | 4 +- ...mand_stream_receiver_hw_tests_xe3_core.cpp | 14 +- .../xe3_core/copy_engine_tests_xe3_core.cpp | 2 +- ...d_stream_receiver_hw_tests_xe_hpc_core.cpp | 2 +- .../copy_engine_tests_xe_hpc_core.cpp | 4 +- .../copy_engine_tests_xe_hpg_core.cpp | 4 +- .../command_stream/command_stream_receiver.h | 2 +- .../command_stream_receiver_hw.h | 2 +- .../command_stream_receiver_hw_base.inl | 18 +- .../helpers/blit_commands_helper_base.inl | 2 +- shared/source/helpers/blit_helper.cpp | 2 +- shared/source/helpers/blit_properties.cpp | 154 +++++++++--------- shared/source/helpers/blit_properties.h | 31 +++- .../libult/ult_aub_command_stream_receiver.h | 6 +- .../libult/ult_command_stream_receiver.h | 6 +- .../mocks/mock_command_stream_receiver.h | 6 +- .../aub_command_stream_receiver_1_tests.cpp | 2 +- .../command_stream_receiver_tests.cpp | 28 ++-- 30 files changed, 209 insertions(+), 185 deletions(-) diff --git a/level_zero/core/source/printf_handler/printf_handler.cpp b/level_zero/core/source/printf_handler/printf_handler.cpp index ec429d1fc0..55aa097f2f 100644 --- a/level_zero/core/source/printf_handler/printf_handler.cpp +++ b/level_zero/core/source/printf_handler/printf_handler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -60,7 +60,7 @@ void PrintfHandler::printOutput(const KernelImmutableData *kernelData, printfBuffer->getGpuAddress(), 0, 0, 0, Vec3(printfOutputSize, 0, 0), 0, 0, 0, 0)); - const auto newTaskCount = bcsEngine->commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, *selectedDevice); + const auto newTaskCount = bcsEngine->commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, *selectedDevice); if (newTaskCount == NEO::CompletionStamp::gpuHang) { PRINT_DEBUG_STRING(NEO::debugManager.flags.PrintDebugMessages.get(), stderr, "Failed to copy printf buffer.\n", ""); printfOutputBuffer = static_cast(printfBuffer->getUnderlyingBuffer()); diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 7f0e57fb54..a5c2970ad5 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -591,7 +591,8 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(CommandS } blitProperties.multiRootDeviceEventSync = multiRootDeviceEventSync; auto currentTimestampPacketNode = timestampPacketContainer->peekNodes().at(0); - blitProperties.outputTimestampPacket = currentTimestampPacketNode; + blitProperties.blitSyncProperties.outputTimestampPacket = currentTimestampPacketNode; + blitProperties.blitSyncProperties.syncMode = (eventsRequest.outEvent && isProfilingEnabled()) ? BlitSyncMode::timestamp : BlitSyncMode::immediate; if (commandStream) { if (timestampPacketDependencies.cacheFlushNodes.peekNodes().size() > 0) { @@ -948,7 +949,7 @@ CompletionStamp CommandQueueHw::enqueueNonBlocked( if (enqueueProperties.blitPropertiesContainer->size() > 0) { auto bcsCsr = getBcsForAuxTranslation(); - const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice()); + const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, getDevice()); if (newTaskCount > CompletionStamp::notReady) { CompletionStamp completionStamp{}; completionStamp.taskCount = newTaskCount; @@ -1219,7 +1220,7 @@ CompletionStamp CommandQueueHw::enqueueCommandWithoutKernel( if (enqueueProperties.operation == EnqueueProperties::Operation::blit) { UNRECOVERABLE_IF(!enqueueProperties.blitPropertiesContainer); if (bcsCsr) { - const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, this->isProfilingEnabled(), getDevice()); + const auto newTaskCount = bcsCsr->flushBcsTask(*enqueueProperties.blitPropertiesContainer, false, getDevice()); if (newTaskCount > CompletionStamp::notReady) { CompletionStamp completionStamp{}; completionStamp.taskCount = newTaskCount; diff --git a/opencl/source/helpers/task_information.cpp b/opencl/source/helpers/task_information.cpp index 141328b5c4..16bed2d893 100644 --- a/opencl/source/helpers/task_information.cpp +++ b/opencl/source/helpers/task_information.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -284,7 +284,7 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term } if (kernelOperation->blitPropertiesContainer.size() > 0) { - const auto newTaskCount = bcsCsrForAuxTranslation->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice()); + const auto newTaskCount = bcsCsrForAuxTranslation->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.getDevice()); if (newTaskCount <= CompletionStamp::notReady) { commandQueue.updateBcsTaskCount(bcsCsrForAuxTranslation->getOsContext().getEngineType(), newTaskCount); } else { @@ -327,13 +327,14 @@ TaskCountType CommandWithoutKernel::dispatchBlitOperation() { blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->previousEnqueueNodes); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->barrierNodes); blitProperties.csrDependencies.timestampPacketContainer.push_back(×tampPacketDependencies->multiCsrDependencies); - blitProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0]; + blitProperties.blitSyncProperties.outputTimestampPacket = currentTimestampPacketNodes->peekNodes()[0]; + blitProperties.blitSyncProperties.syncMode = (eventsRequest.outEvent && commandQueue.isProfilingEnabled()) ? BlitSyncMode::timestamp : BlitSyncMode::immediate; if (commandQueue.getContext().getRootDeviceIndices().size() > 1) { eventsRequest.fillCsrDependenciesForRootDevices(blitProperties.csrDependencies, *bcsCsr); } - const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.isProfilingEnabled(), commandQueue.getDevice()); + const auto newTaskCount = bcsCsr->flushBcsTask(kernelOperation->blitPropertiesContainer, false, commandQueue.getDevice()); if (newTaskCount > CompletionStamp::notReady) { return newTaskCount; } diff --git a/opencl/source/program/printf_handler.cpp b/opencl/source/program/printf_handler.cpp index 9a299881e5..1497f9c887 100644 --- a/opencl/source/program/printf_handler.cpp +++ b/opencl/source/program/printf_handler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -99,7 +99,7 @@ bool PrintfHandler::printEnqueueOutput() { printfSurface->getGpuAddress(), 0, 0, 0, Vec3(printfOutputSize, 0, 0), 0, 0, 0, 0)); - const auto newTaskCount = bcsEngine.commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, device); + const auto newTaskCount = bcsEngine.commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, device); if (newTaskCount > CompletionStamp::notReady) { return false; } diff --git a/opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h b/opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h index e33c8b9604..7408353874 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h +++ b/opencl/test/unit_test/aub_tests/command_stream/copy_engine_aub_tests_xehp_and_later.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -121,7 +121,7 @@ struct CopyEngineXeHPAndLater : public MulticontextOclAubFixture, public ::testi void executeBlitCommand(const BlitProperties &blitProperties, bool blocking) { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - bcsCsr->flushBcsTask(blitPropertiesContainer, blocking, false, rootDevice->getDevice()); + bcsCsr->flushBcsTask(blitPropertiesContainer, blocking, rootDevice->getDevice()); } template diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_fixture.h b/opencl/test/unit_test/command_queue/blit_enqueue_fixture.h index 2631a51b7f..1a36be9f23 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_fixture.h +++ b/opencl/test/unit_test/command_queue/blit_enqueue_fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -57,7 +57,7 @@ struct BlitEnqueueTests : public ::testing::Test { BlitPropertiesContainer container; container.push_back(blitProperties); - bcsCsr->flushBcsTask(container, true, false, const_cast(device)); + bcsCsr->flushBcsTask(container, true, const_cast(device)); return BlitOperationResult::success; }; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index b78e52e93d..45f44d08ee 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1273,7 +1273,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPageTableManagerPointerWhenCa BlitPropertiesContainer container; container.push_back(blitProperties); - bcsCsr->flushBcsTask(container, true, false, *pDevice); + bcsCsr->flushBcsTask(container, true, *pDevice); EXPECT_TRUE(bcsCsr->pageTableManagerInitialized); EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); @@ -1282,7 +1282,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPageTableManagerPointerWhenCa EXPECT_EQ(bcsCsr, pageTableManager->initContextAuxTableRegisterParamsPassed[0].initialBBHandle); pDevice->resetCommandStreamReceiver(bcsCsr2); - bcsCsr2->flushBcsTask(container, true, false, *pDevice); + bcsCsr2->flushBcsTask(container, true, *pDevice); EXPECT_TRUE(bcsCsr2->pageTableManagerInitialized); @@ -1318,11 +1318,11 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenPageTableManagerPointerWhenCa BlitPropertiesContainer container; container.push_back(blitProperties); - bcsCsr->flushBcsTask(container, true, false, *pDevice); + bcsCsr->flushBcsTask(container, true, *pDevice); EXPECT_TRUE(bcsCsr->pageTableManagerInitialized); - bcsCsr->flushBcsTask(container, true, false, *pDevice); + bcsCsr->flushBcsTask(container, true, *pDevice); memoryManager->freeGraphicsMemory(graphicsAllocation); @@ -1359,18 +1359,18 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNullPageTableManagerWhenCallB BlitPropertiesContainer container; container.push_back(blitProperties); - bcsCsr->flushBcsTask(container, true, false, *pDevice); + bcsCsr->flushBcsTask(container, true, *pDevice); EXPECT_FALSE(bcsCsr->pageTableManagerInitialized); EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); pDevice->resetCommandStreamReceiver(bcsCsr2); - bcsCsr2->flushBcsTask(container, true, false, *pDevice); + bcsCsr2->flushBcsTask(container, true, *pDevice); EXPECT_FALSE(bcsCsr2->pageTableManagerInitialized); bcsCsr2->pageTableManagerInitialized = true; - EXPECT_NO_THROW(bcsCsr2->flushBcsTask(container, true, false, *pDevice)); + EXPECT_NO_THROW(bcsCsr2->flushBcsTask(container, true, *pDevice)); memoryManager->freeGraphicsMemory(graphicsAllocation); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index fa6d07f93b..75092931fc 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -479,7 +479,7 @@ HWTEST_F(BcsTests, givenMultipleBlitPropertiesWhenDispatchingThenProgramCommands blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties2); - csr.flushBcsTask(blitPropertiesContainer, true, false, *pDevice); + csr.flushBcsTask(blitPropertiesContainer, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); @@ -528,7 +528,7 @@ HWTEST_F(BcsTests, whenBlitBufferThenCommandBufferHasProperTaskCount) { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - csr.flushBcsTask(blitPropertiesContainer, true, false, *pDevice); + csr.flushBcsTask(blitPropertiesContainer, true, *pDevice); EXPECT_EQ(csr.getCS(0u).getGraphicsAllocation()->getTaskCount(csr.getOsContext().getContextId()), csr.peekTaskCount()); EXPECT_EQ(csr.getCS(0u).getGraphicsAllocation()->getResidencyTaskCount(csr.getOsContext().getContextId()), csr.peekTaskCount()); @@ -559,7 +559,7 @@ HWTEST_F(BcsTests, givenUpdateTaskCountFromWaitWhenBlitBufferThenCsrHasProperTas auto taskCount = csr.peekTaskCount(); - csr.flushBcsTask(blitPropertiesContainer, false, false, *pDevice); + csr.flushBcsTask(blitPropertiesContainer, false, *pDevice); EXPECT_EQ(csr.peekTaskCount(), taskCount + 1); EXPECT_EQ(csr.peekLatestFlushedTaskCount(), taskCount); @@ -587,12 +587,13 @@ HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferThenCommandBufferIsConstru 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp(*bcsCsr->getTimestampPacketAllocator(), 1u); - blitProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.syncMode = BlitSyncMode::timestamp; BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - bcsCsr->flushBcsTask(blitPropertiesContainer, false, true, *pDevice); + bcsCsr->flushBcsTask(blitPropertiesContainer, false, *pDevice); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->commandStream); @@ -639,12 +640,12 @@ HWTEST_F(BcsTests, givenProfilingEnabledWhenBlitBufferAndForceTlbFlushAfterCopyT 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp(*bcsCsr->getTimestampPacketAllocator(), 1u); - blitProperties.outputTimestampPacket = timestamp.getNode(0); - + blitProperties.blitSyncProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.syncMode = BlitSyncMode::timestamp; BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - bcsCsr->flushBcsTask(blitPropertiesContainer, false, true, *pDevice); + bcsCsr->flushBcsTask(blitPropertiesContainer, false, *pDevice); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->commandStream); @@ -696,7 +697,7 @@ HWTEST_F(BcsTests, givenProfilingDisabledWhenBlitBufferAndForceTlbFlushAfterCopy BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - bcsCsr->flushBcsTask(blitPropertiesContainer, false, false, *pDevice); + bcsCsr->flushBcsTask(blitPropertiesContainer, false, *pDevice); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->commandStream); @@ -737,12 +738,13 @@ HWTEST_F(BcsTests, givenNotInitializedOsContextWhenBlitBufferIsCalledThenInitial 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp(*bcsCsr->getTimestampPacketAllocator(), 1u); - blitProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.syncMode = BlitSyncMode::timestamp; BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); EXPECT_FALSE(bcsOsContext->isInitialized()); - bcsCsr->flushBcsTask(blitPropertiesContainer, false, true, *pDevice); + bcsCsr->flushBcsTask(blitPropertiesContainer, false, *pDevice); EXPECT_TRUE(bcsOsContext->isInitialized()); } @@ -780,7 +782,7 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties2); - csr.flushBcsTask(blitPropertiesContainer, false, false, *pDevice); + csr.flushBcsTask(blitPropertiesContainer, false, *pDevice); expectedCalled++; uint32_t residentAllocationsNum = 5u; @@ -847,7 +849,7 @@ HWTEST_F(BcsTests, givenFenceAllocationIsRequiredWhenBlitDispatchedThenMakeAllAl blitPropertiesContainer.push_back(blitProperties1); blitPropertiesContainer.push_back(blitProperties2); - bcsCsr->flushBcsTask(blitPropertiesContainer, false, false, *pDevice); + bcsCsr->flushBcsTask(blitPropertiesContainer, false, *pDevice); uint32_t residentAllocationsNum = 6u; EXPECT_TRUE(bcsCsr->isMadeResident(graphicsAllocation1)); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h index 87b719bf99..9720cfe7e1 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2023 Intel Corporation + * Copyright (C) 2020-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -30,7 +30,7 @@ struct BcsTests : public Test { BlitPropertiesContainer container; container.push_back(blitProperties); - return bcsCsr->flushBcsTask(container, blocking, false, device); + return bcsCsr->flushBcsTask(container, blocking, device); } TimestampPacketContainer timestampPacketContainer; diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index 0a0b35f198..be1c31d834 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -831,7 +831,7 @@ class CommandStreamReceiverMock : public CommandStreamReceiver { WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, QueueThrottle throttle) override { return WaitStatus::ready; } - TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override { return taskCount; }; CompletionStamp flushTask( LinearStream &commandStream, diff --git a/opencl/test/unit_test/mocks/mock_context.cpp b/opencl/test/unit_test/mocks/mock_context.cpp index c5a043610e..9948869586 100644 --- a/opencl/test/unit_test/mocks/mock_context.cpp +++ b/opencl/test/unit_test/mocks/mock_context.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -198,7 +198,7 @@ BcsMockContext::BcsMockContext(ClDevice *device) : MockContext(device) { BlitPropertiesContainer container; container.push_back(blitProperties); - bcsCsr->flushBcsTask(container, true, false, const_cast(device)); + bcsCsr->flushBcsTask(container, true, const_cast(device)); return BlitOperationResult::success; }; diff --git a/opencl/test/unit_test/xe2_hpg_core/command_stream_receiver_hw_tests_xe2_hpg_core.cpp b/opencl/test/unit_test/xe2_hpg_core/command_stream_receiver_hw_tests_xe2_hpg_core.cpp index f9730390f3..3cde3c3e55 100644 --- a/opencl/test/unit_test/xe2_hpg_core/command_stream_receiver_hw_tests_xe2_hpg_core.cpp +++ b/opencl/test/unit_test/xe2_hpg_core/command_stream_receiver_hw_tests_xe2_hpg_core.cpp @@ -77,18 +77,19 @@ XE2_HPG_CORETEST_F(CommandStreamReceiverXe2HpgCoreTests, givenProfilingEnabledWh 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp(*bcsCsr->getTimestampPacketAllocator(), 1u); - blitProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.syncMode = BlitSyncMode::timestamp; - auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*blitProperties.outputTimestampPacket); - auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*blitProperties.outputTimestampPacket); + auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*blitProperties.blitSyncProperties.outputTimestampPacket); + auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*blitProperties.blitSyncProperties.outputTimestampPacket); - auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket); - auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*blitProperties.outputTimestampPacket); + auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.blitSyncProperties.outputTimestampPacket); + auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*blitProperties.blitSyncProperties.outputTimestampPacket); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - bcsCsr->flushBcsTask(blitPropertiesContainer, false, true, *pDevice); + bcsCsr->flushBcsTask(blitPropertiesContainer, false, *pDevice); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->commandStream); @@ -243,7 +244,7 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaMiMemFenceTestXe2HpgCore, givenSystemMemo EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent); BlitPropertiesContainer blitPropertiesContainer; - commandStreamReceiver.flushBcsTask(blitPropertiesContainer, false, false, *pDevice); + commandStreamReceiver.flushBcsTask(blitPropertiesContainer, false, *pDevice); EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent); HardwareParse hwParser; diff --git a/opencl/test/unit_test/xe2_hpg_core/copy_engine_tests_xe2_hpg_core.cpp b/opencl/test/unit_test/xe2_hpg_core/copy_engine_tests_xe2_hpg_core.cpp index ebd8a91396..b36a18a3ff 100644 --- a/opencl/test/unit_test/xe2_hpg_core/copy_engine_tests_xe2_hpg_core.cpp +++ b/opencl/test/unit_test/xe2_hpg_core/copy_engine_tests_xe2_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -40,7 +40,7 @@ struct BlitXe2HpgCoreTests : public ::testing::Test { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - return csr->flushBcsTask(blitPropertiesContainer, blocking, false, device); + return csr->flushBcsTask(blitPropertiesContainer, blocking, device); } std::unique_ptr clDevice; diff --git a/opencl/test/unit_test/xe3_core/command_stream_receiver_hw_tests_xe3_core.cpp b/opencl/test/unit_test/xe3_core/command_stream_receiver_hw_tests_xe3_core.cpp index eacf150a8b..464f1541a1 100644 --- a/opencl/test/unit_test/xe3_core/command_stream_receiver_hw_tests_xe3_core.cpp +++ b/opencl/test/unit_test/xe3_core/command_stream_receiver_hw_tests_xe3_core.cpp @@ -76,18 +76,18 @@ XE3_CORETEST_F(CommandStreamReceiverXe3CoreTests, givenProfilingEnabledWhenBlitB 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp(*bcsCsr->getTimestampPacketAllocator(), 1u); - blitProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.syncMode = BlitSyncMode::timestamp; + auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*blitProperties.blitSyncProperties.outputTimestampPacket); + auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*blitProperties.blitSyncProperties.outputTimestampPacket); - auto timestampContextStartGpuAddress = TimestampPacketHelper::getContextStartGpuAddress(*blitProperties.outputTimestampPacket); - auto timestampGlobalStartAddress = TimestampPacketHelper::getGlobalStartGpuAddress(*blitProperties.outputTimestampPacket); - - auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket); - auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*blitProperties.outputTimestampPacket); + auto timestampContextEndGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.blitSyncProperties.outputTimestampPacket); + auto timestampGlobalEndAddress = TimestampPacketHelper::getGlobalEndGpuAddress(*blitProperties.blitSyncProperties.outputTimestampPacket); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - bcsCsr->flushBcsTask(blitPropertiesContainer, false, true, *pDevice); + bcsCsr->flushBcsTask(blitPropertiesContainer, false, *pDevice); HardwareParse hwParser; hwParser.parseCommands(bcsCsr->commandStream); diff --git a/opencl/test/unit_test/xe3_core/copy_engine_tests_xe3_core.cpp b/opencl/test/unit_test/xe3_core/copy_engine_tests_xe3_core.cpp index 7eb559887d..9d69f49a24 100644 --- a/opencl/test/unit_test/xe3_core/copy_engine_tests_xe3_core.cpp +++ b/opencl/test/unit_test/xe3_core/copy_engine_tests_xe3_core.cpp @@ -41,7 +41,7 @@ struct BlitXe3CoreTests : public ::testing::Test { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - return csr->flushBcsTask(blitPropertiesContainer, blocking, false, device); + return csr->flushBcsTask(blitPropertiesContainer, blocking, device); } std::unique_ptr clDevice; diff --git a/opencl/test/unit_test/xe_hpc_core/command_stream_receiver_hw_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/command_stream_receiver_hw_tests_xe_hpc_core.cpp index c5994995a3..119c9f53fa 100644 --- a/opencl/test/unit_test/xe_hpc_core/command_stream_receiver_hw_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/command_stream_receiver_hw_tests_xe_hpc_core.cpp @@ -137,7 +137,7 @@ XE_HPC_CORETEST_F(SystemMemoryFenceViaMiMemFenceTest, givenSystemMemoryFenceGene EXPECT_FALSE(commandStreamReceiver.isEnginePrologueSent); BlitPropertiesContainer blitPropertiesContainer; - commandStreamReceiver.flushBcsTask(blitPropertiesContainer, false, false, *pDevice); + commandStreamReceiver.flushBcsTask(blitPropertiesContainer, false, *pDevice); EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent); HardwareParse hwParser; diff --git a/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp index e15bc0ca79..81d82c7f39 100644 --- a/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/copy_engine_tests_xe_hpc_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -43,7 +43,7 @@ struct BlitXeHpcCoreTests : public ::testing::Test { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - return csr->flushBcsTask(blitPropertiesContainer, blocking, false, device); + return csr->flushBcsTask(blitPropertiesContainer, blocking, device); } std::unique_ptr clDevice; diff --git a/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp b/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp index 4217210ed2..f687110b5c 100644 --- a/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp +++ b/opencl/test/unit_test/xe_hpg_core/copy_engine_tests_xe_hpg_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -45,7 +45,7 @@ struct BlitXeHpgCoreTests : public ::testing::Test { BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - return csr->flushBcsTask(blitPropertiesContainer, blocking, false, device); + return csr->flushBcsTask(blitPropertiesContainer, blocking, device); } std::unique_ptr clDevice; diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index f2513946f6..ed52200e82 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -307,7 +307,7 @@ class CommandStreamReceiver { this->latestFlushedTaskCount = latestFlushedTaskCount; } - virtual TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) = 0; + virtual TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) = 0; virtual SubmissionStatus flushTagUpdate() = 0; virtual void updateTagFromWait() = 0; diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 1b4de4f415..fdd5574055 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -130,7 +130,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { return CommandStreamReceiverType::hardware; } - TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override; + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override; SubmissionStatus flushTagUpdate() override; SubmissionStatus flushMiFlushDW(bool initializeProlog); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 4aba095c69..6b3c58149a 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -979,7 +979,7 @@ uint32_t CommandStreamReceiverHw::getDirectSubmissionRelaxedOrderingQ } template -TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) { +TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) { auto lock = obtainUniqueOwnership(); bool blitterDirectSubmission = this->isBlitterDirectSubmissionEnabled(); auto debugPauseEnabled = PauseOnGpuProperties::featureEnabled(debugManager.flags.PauseOnBlitCopy.get()); @@ -989,7 +989,7 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert const bool hasStallingCmds = updateTag || !this->isEnginePrologueSent; const bool relaxedOrderingAllowed = bcsRelaxedOrderingAllowed(blitPropertiesContainer, hasStallingCmds); - auto estimatedCsSize = BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, profilingEnabled, debugPauseEnabled, blitterDirectSubmission, + auto estimatedCsSize = BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, blitPropertiesContainer[0].blitSyncProperties.isTimestampMode(), debugPauseEnabled, blitterDirectSubmission, relaxedOrderingAllowed, *rootDeviceEnvironment.get()); auto &commandStream = getCS(estimatedCsSize); @@ -1037,8 +1037,8 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert BlitCommandsHelper::encodeWa(commandStream, blitProperties, latestSentBcsWaValue); - if (blitProperties.outputTimestampPacket && profilingEnabled) { - BlitCommandsHelper::encodeProfilingStartMmios(commandStream, *blitProperties.outputTimestampPacket); + if (blitProperties.blitSyncProperties.outputTimestampPacket && blitProperties.blitSyncProperties.isTimestampMode()) { + BlitCommandsHelper::encodeProfilingStartMmios(commandStream, *blitProperties.blitSyncProperties.outputTimestampPacket); } if (debugManager.flags.FlushTlbBeforeCopy.get() == 1) { @@ -1051,7 +1051,7 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert BlitCommandsHelper::dispatchBlitCommands(blitProperties, commandStream, *waArgs.rootDeviceEnvironment); - if (blitProperties.outputTimestampPacket) { + if (blitProperties.blitSyncProperties.outputTimestampPacket) { bool deviceToHostPostSyncFenceRequired = getProductHelper().isDeviceToHostCopySignalingFenceRequired() && !blitProperties.dstAllocation->isAllocatedInLocalMemoryPool() && blitProperties.srcAllocation->isAllocatedInLocalMemoryPool(); @@ -1060,16 +1060,16 @@ TaskCountType CommandStreamReceiverHw::flushBcsTask(const BlitPropert MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, tagAllocation->getGpuAddress(), false, peekRootDeviceEnvironment()); } - if (profilingEnabled) { + if (blitProperties.blitSyncProperties.isTimestampMode()) { EncodeMiFlushDW::programWithWa(commandStream, 0llu, newTaskCount, args); - BlitCommandsHelper::encodeProfilingEndMmios(commandStream, *blitProperties.outputTimestampPacket); + BlitCommandsHelper::encodeProfilingEndMmios(commandStream, *blitProperties.blitSyncProperties.outputTimestampPacket); } else { - auto timestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.outputTimestampPacket); + auto timestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*blitProperties.blitSyncProperties.outputTimestampPacket); args.commandWithPostSync = true; EncodeMiFlushDW::programWithWa(commandStream, timestampPacketGpuAddress, 0, args); } - makeResident(*blitProperties.outputTimestampPacket->getBaseGraphicsAllocation()); + makeResident(*blitProperties.blitSyncProperties.outputTimestampPacket->getBaseGraphicsAllocation()); } blitProperties.csrDependencies.makeResident(*this); diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 0c71a89a3f..3b26af2b98 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -144,7 +144,7 @@ size_t BlitCommandsHelper::estimateBlitCommandsSize(const BlitPropert size_t size = 0; EncodeDummyBlitWaArgs waArgs{false, const_cast(&rootDeviceEnvironment)}; for (auto &blitProperties : blitPropertiesContainer) { - auto updateTimestampPacket = blitProperties.outputTimestampPacket != nullptr; + auto updateTimestampPacket = blitProperties.blitSyncProperties.outputTimestampPacket != nullptr; auto isImage = blitProperties.isImageOperation(); size += BlitCommandsHelper::estimateBlitCommandSize(blitProperties.copySize, blitProperties.csrDependencies, updateTimestampPacket, profilingEnabled, isImage, rootDeviceEnvironment, blitProperties.isSystemMemoryPoolUsed, relaxedOrderingEnabled); diff --git a/shared/source/helpers/blit_helper.cpp b/shared/source/helpers/blit_helper.cpp index 3bd4ccc83f..c3382a376a 100644 --- a/shared/source/helpers/blit_helper.cpp +++ b/shared/source/helpers/blit_helper.cpp @@ -65,7 +65,7 @@ BlitOperationResult BlitHelper::blitMemoryToAllocationBanks(const Device &device (memory->getGpuAddress() + offset), 0, 0, 0, size, 0, 0, 0, 0)); - const auto newTaskCount = bcsEngine->commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, false, *pDeviceForBlit); + const auto newTaskCount = bcsEngine->commandStreamReceiver->flushBcsTask(blitPropertiesContainer, true, *pDeviceForBlit); if (newTaskCount == CompletionStamp::gpuHang) { return BlitOperationResult::gpuHang; } diff --git a/shared/source/helpers/blit_properties.cpp b/shared/source/helpers/blit_properties.cpp index 3e346208c9..2934a53b32 100644 --- a/shared/source/helpers/blit_properties.cpp +++ b/shared/source/helpers/blit_properties.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -42,51 +42,48 @@ BlitProperties BlitProperties::constructPropertiesForReadWrite(BlitterConstants: if (BlitterConstants::BlitDirection::hostPtrToBuffer == blitDirection || BlitterConstants::BlitDirection::hostPtrToImage == blitDirection) { return { - nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync - blitDirection, // blitDirection - {}, // csrDependencies - AuxTranslationDirection::none, // auxTranslationDirection - memObjAllocation, // dstAllocation - hostAllocation, // srcAllocation - clearColorAllocation, // clearColorAllocation - memObjGpuVa, // dstGpuAddress - hostAllocGpuVa, // srcGpuAddress - copySize, // copySize - copyOffset, // dstOffset - hostPtrOffset, // srcOffset - true, - gpuRowPitch, // dstRowPitch - gpuSlicePitch, // dstSlicePitch - hostRowPitch, // srcRowPitch - hostSlicePitch, // srcSlicePitch - copySize, // dstSize - copySize // srcSize - }; - + .blitSyncProperties = {}, + .csrDependencies = {}, + .multiRootDeviceEventSync = nullptr, + .blitDirection = blitDirection, + .auxTranslationDirection = AuxTranslationDirection::none, + .dstAllocation = memObjAllocation, + .srcAllocation = hostAllocation, + .clearColorAllocation = clearColorAllocation, + .dstGpuAddress = memObjGpuVa, + .srcGpuAddress = hostAllocGpuVa, + .copySize = copySize, + .dstOffset = copyOffset, + .srcOffset = hostPtrOffset, + .dstRowPitch = gpuRowPitch, + .dstSlicePitch = gpuSlicePitch, + .srcRowPitch = hostRowPitch, + .srcSlicePitch = hostSlicePitch, + .dstSize = copySize, + .srcSize = copySize, + .isSystemMemoryPoolUsed = true}; } else { return { - nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync - blitDirection, // blitDirection - {}, // csrDependencies - AuxTranslationDirection::none, // auxTranslationDirection - hostAllocation, // dstAllocation - memObjAllocation, // srcAllocation - clearColorAllocation, // clearColorAllocation - hostAllocGpuVa, // dstGpuAddress - memObjGpuVa, // srcGpuAddress - copySize, // copySize - hostPtrOffset, // dstOffset - copyOffset, // srcOffset - true, - hostRowPitch, // dstRowPitch - hostSlicePitch, // dstSlicePitch - gpuRowPitch, // srcRowPitch - gpuSlicePitch, // srcSlicePitch - copySize, // dstSize - copySize // srcSize - }; + .blitSyncProperties = {}, + .csrDependencies = {}, + .multiRootDeviceEventSync = nullptr, + .blitDirection = blitDirection, + .auxTranslationDirection = AuxTranslationDirection::none, + .dstAllocation = hostAllocation, + .srcAllocation = memObjAllocation, + .clearColorAllocation = clearColorAllocation, + .dstGpuAddress = hostAllocGpuVa, + .srcGpuAddress = memObjGpuVa, + .copySize = copySize, + .dstOffset = hostPtrOffset, + .srcOffset = copyOffset, + .dstRowPitch = hostRowPitch, + .dstSlicePitch = hostSlicePitch, + .srcRowPitch = gpuRowPitch, + .srcSlicePitch = gpuSlicePitch, + .dstSize = copySize, + .srcSize = copySize, + .isSystemMemoryPoolUsed = true}; }; } @@ -98,24 +95,24 @@ BlitProperties BlitProperties::constructPropertiesForCopy(GraphicsAllocation *ds copySize.z = copySize.z ? copySize.z : 1; return { - nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync - BlitterConstants::BlitDirection::bufferToBuffer, // blitDirection - {}, // csrDependencies - AuxTranslationDirection::none, // auxTranslationDirection - dstAllocation, // dstAllocation - srcAllocation, // srcAllocation - clearColorAllocation, // clearColorAllocation - dstAllocation->getGpuAddress(), // dstGpuAddress - srcAllocation->getGpuAddress(), // srcGpuAddress - copySize, // copySize - dstOffset, // dstOffset - srcOffset, // srcOffset - MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool()), - dstRowPitch, // dstRowPitch - dstSlicePitch, // dstSlicePitch - srcRowPitch, // srcRowPitch - srcSlicePitch}; // srcSlicePitch + .blitSyncProperties = {}, + .csrDependencies = {}, + .multiRootDeviceEventSync = nullptr, + .blitDirection = BlitterConstants::BlitDirection::bufferToBuffer, + .auxTranslationDirection = AuxTranslationDirection::none, + .dstAllocation = dstAllocation, + .srcAllocation = srcAllocation, + .clearColorAllocation = clearColorAllocation, + .dstGpuAddress = dstAllocation->getGpuAddress(), + .srcGpuAddress = srcAllocation->getGpuAddress(), + .copySize = copySize, + .dstOffset = dstOffset, + .srcOffset = srcOffset, + .dstRowPitch = dstRowPitch, + .dstSlicePitch = dstSlicePitch, + .srcRowPitch = srcRowPitch, + .srcSlicePitch = srcSlicePitch, + .isSystemMemoryPoolUsed = MemoryPoolHelper::isSystemMemoryPool(dstAllocation->getMemoryPool(), srcAllocation->getMemoryPool())}; } BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection, @@ -123,20 +120,18 @@ BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslati auto allocationSize = allocation->getUnderlyingBufferSize(); return { - nullptr, // outputTimestampPacket - nullptr, // multiRootDeviceEventSync - BlitterConstants::BlitDirection::bufferToBuffer, // blitDirection - {}, // csrDependencies - auxTranslationDirection, // auxTranslationDirection - allocation, // dstAllocation - allocation, // srcAllocation - clearColorAllocation, // clearColorAllocation - allocation->getGpuAddress(), // dstGpuAddress - allocation->getGpuAddress(), // srcGpuAddress - {allocationSize, 1, 1}, // copySize - 0, // dstOffset - 0, // srcOffset - MemoryPoolHelper::isSystemMemoryPool(allocation->getMemoryPool())}; + .blitSyncProperties = {}, + .csrDependencies = {}, + .multiRootDeviceEventSync = nullptr, + .blitDirection = BlitterConstants::BlitDirection::bufferToBuffer, + .auxTranslationDirection = auxTranslationDirection, + .dstAllocation = allocation, + .srcAllocation = allocation, + .clearColorAllocation = clearColorAllocation, + .dstGpuAddress = allocation->getGpuAddress(), + .srcGpuAddress = allocation->getGpuAddress(), + .copySize = {allocationSize, 1, 1}, + .isSystemMemoryPoolUsed = MemoryPoolHelper::isSystemMemoryPool(allocation->getMemoryPool())}; } void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer &blitPropertiesContainer, TimestampPacketDependencies ×tampPacketDependencies, @@ -145,8 +140,11 @@ void BlitProperties::setupDependenciesForAuxTranslation(BlitPropertiesContainer auto numObjects = blitPropertiesContainer.size() / 2; for (size_t i = 0; i < numObjects; i++) { - blitPropertiesContainer[i].outputTimestampPacket = timestampPacketDependencies.auxToNonAuxNodes.peekNodes()[i]; - blitPropertiesContainer[i + numObjects].outputTimestampPacket = timestampPacketDependencies.nonAuxToAuxNodes.peekNodes()[i]; + blitPropertiesContainer[i].blitSyncProperties.outputTimestampPacket = timestampPacketDependencies.auxToNonAuxNodes.peekNodes()[i]; + blitPropertiesContainer[i].blitSyncProperties.syncMode = BlitSyncMode::immediate; + + blitPropertiesContainer[i + numObjects].blitSyncProperties.outputTimestampPacket = timestampPacketDependencies.nonAuxToAuxNodes.peekNodes()[i]; + blitPropertiesContainer[i + numObjects].blitSyncProperties.syncMode = BlitSyncMode::immediate; } auto nodesAllocator = gpguCsr.getTimestampPacketAllocator(); diff --git a/shared/source/helpers/blit_properties.h b/shared/source/helpers/blit_properties.h index d245ab6288..5c21ae4c79 100644 --- a/shared/source/helpers/blit_properties.h +++ b/shared/source/helpers/blit_properties.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -22,6 +22,26 @@ class TimestampPacketContainer; class GraphicsAllocation; class CommandStreamReceiver; +enum class BlitSyncMode { + none = 0, + timestamp, + immediate, + timestampAndImmediate +}; + +struct BlitSyncProperties { + TagNodeBase *outputTimestampPacket = nullptr; + BlitSyncMode syncMode = BlitSyncMode::none; + uint64_t deviceGpuWriteAddress = 0; + uint64_t hostGpuWriteAddress = 0; + uint64_t timestampGpuWriteAddress = 0; + uint64_t writeValue = 0; + + bool isTimestampMode() const { + return (syncMode == BlitSyncMode::timestamp) || (syncMode == BlitSyncMode::timestampAndImmediate); + } +}; + struct BlitProperties { static BlitProperties constructPropertiesForReadWrite(BlitterConstants::BlitDirection blitDirection, CommandStreamReceiver &commandStreamReceiver, @@ -45,10 +65,11 @@ struct BlitProperties { TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents, CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr); - TagNodeBase *outputTimestampPacket = nullptr; - TagNodeBase *multiRootDeviceEventSync = nullptr; - BlitterConstants::BlitDirection blitDirection = BlitterConstants::BlitDirection::bufferToHostPtr; + BlitSyncProperties blitSyncProperties = {}; CsrDependencies csrDependencies; + TagNodeBase *multiRootDeviceEventSync = nullptr; + + BlitterConstants::BlitDirection blitDirection = BlitterConstants::BlitDirection::bufferToHostPtr; AuxTranslationDirection auxTranslationDirection = AuxTranslationDirection::none; GraphicsAllocation *dstAllocation = nullptr; @@ -60,7 +81,6 @@ struct BlitProperties { Vec3 copySize = 0; Vec3 dstOffset = 0; Vec3 srcOffset = 0; - bool isSystemMemoryPoolUsed = false; size_t dstRowPitch = 0; size_t dstSlicePitch = 0; @@ -71,6 +91,7 @@ struct BlitProperties { size_t bytesPerPixel = 1; GMM_YUV_PLANE_ENUM dstPlane = GMM_YUV_PLANE_ENUM::GMM_NO_PLANE; GMM_YUV_PLANE_ENUM srcPlane = GMM_YUV_PLANE_ENUM::GMM_NO_PLANE; + bool isSystemMemoryPoolUsed = false; bool isImageOperation() const; }; diff --git a/shared/test/common/libult/ult_aub_command_stream_receiver.h b/shared/test/common/libult/ult_aub_command_stream_receiver.h index 61400b8cd2..1c74ce5a98 100644 --- a/shared/test/common/libult/ult_aub_command_stream_receiver.h +++ b/shared/test/common/libult/ult_aub_command_stream_receiver.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2019-2024 Intel Corporation + * Copyright (C) 2019-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -48,9 +48,9 @@ class UltAubCommandStreamReceiver : public AUBCommandStreamReceiverHw return csr; } - TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override { blitBufferCalled++; - return BaseClass::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device); + return BaseClass::flushBcsTask(blitPropertiesContainer, blocking, device); } void pollForCompletion(bool skipTaskCountCheck) override { diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 4cebbdf182..dff887b104 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2024 Intel Corporation + * Copyright (C) 2018-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -415,12 +415,12 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ return CommandStreamReceiverHw::obtainUniqueOwnership(); } - TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override { blitBufferCalled++; receivedBlitProperties = blitPropertiesContainer; if (callBaseFlushBcsTask) { - return CommandStreamReceiverHw::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device); + return CommandStreamReceiverHw::flushBcsTask(blitPropertiesContainer, blocking, device); } else { return flushBcsTaskReturnValue; } diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 43ad5dee41..c9817203e7 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -175,7 +175,7 @@ class MockCommandStreamReceiver : public CommandStreamReceiver { return WaitStatus::ready; } - TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; }; + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override { return taskCount; }; CommandStreamReceiverType getType() const override { return commandStreamReceiverType; @@ -414,9 +414,9 @@ class MockCsrHw2 : public CommandStreamReceiverHw { return completionStamp; } - TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { + TaskCountType flushBcsTask(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, Device &device) override { if (!skipBlitCalls) { - return CommandStreamReceiverHw::flushBcsTask(blitPropertiesContainer, blocking, profilingEnabled, device); + return CommandStreamReceiverHw::flushBcsTask(blitPropertiesContainer, blocking, device); } return taskCount; } diff --git a/shared/test/unit_test/command_stream/aub_command_stream_receiver_1_tests.cpp b/shared/test/unit_test/command_stream/aub_command_stream_receiver_1_tests.cpp index cae653169b..f35b02f3b7 100644 --- a/shared/test/unit_test/command_stream/aub_command_stream_receiver_1_tests.cpp +++ b/shared/test/unit_test/command_stream/aub_command_stream_receiver_1_tests.cpp @@ -1224,7 +1224,7 @@ HWTEST_F(AubCommandStreamReceiverTests, WhenBlitBufferIsCalledThenCounterIsCorre BlitProperties blitProperties = BlitProperties::constructPropertiesForCopy(&allocation, &allocation, 0, 0, 0, 0, 0, 0, 0, aubCsr->getClearColorAllocation()); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); - aubCsr->flushBcsTask(blitPropertiesContainer, true, false, *pDevice); + aubCsr->flushBcsTask(blitPropertiesContainer, true, *pDevice); EXPECT_EQ(1u, aubCsr->blitBufferCalled); } diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 408ce3ab37..a0b552be80 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -2975,11 +2975,11 @@ HWTEST_F(CommandStreamReceiverHwTest, givenFailureOnFlushWhenFlushingBcsTaskThen container.push_back(blitProperties); commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfHostMemory; - EXPECT_EQ(CompletionStamp::outOfHostMemory, commandStreamReceiver.flushBcsTask(container, true, false, *pDevice)); + EXPECT_EQ(CompletionStamp::outOfHostMemory, commandStreamReceiver.flushBcsTask(container, true, *pDevice)); commandStreamReceiver.flushReturnValue = SubmissionStatus::outOfMemory; - EXPECT_EQ(CompletionStamp::outOfDeviceMemory, commandStreamReceiver.flushBcsTask(container, true, false, *pDevice)); + EXPECT_EQ(CompletionStamp::outOfDeviceMemory, commandStreamReceiver.flushBcsTask(container, true, *pDevice)); commandStreamReceiver.flushReturnValue = SubmissionStatus::failed; - EXPECT_EQ(CompletionStamp::failed, commandStreamReceiver.flushBcsTask(container, true, false, *pDevice)); + EXPECT_EQ(CompletionStamp::failed, commandStreamReceiver.flushBcsTask(container, true, *pDevice)); } HWTEST_F(CommandStreamReceiverHwTest, givenFlushBcsTaskVerifyLatestSentTaskCountUpdated) { @@ -3055,13 +3055,13 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT bcsCsr, &mockAllocation, nullptr, hostPtr, mockAllocation.getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); - blitProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.outputTimestampPacket = timestamp.getNode(0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); offset = bcsCsr.commandStream.getUsed(); - bcsCsr.flushBcsTask(blitPropertiesContainer, true, false, *pDevice); + bcsCsr.flushBcsTask(blitPropertiesContainer, true, *pDevice); EXPECT_TRUE(verify(true)); } @@ -3079,7 +3079,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT blitPropertiesContainer.push_back(blitProperties); offset = bcsCsr.commandStream.getUsed(); - bcsCsr.flushBcsTask(blitPropertiesContainer, true, false, *pDevice); + bcsCsr.flushBcsTask(blitPropertiesContainer, true, *pDevice); EXPECT_TRUE(verify(false)); } @@ -3092,13 +3092,13 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT bcsCsr, &mockAllocation, nullptr, hostPtr, mockAllocation.getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); - blitProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.outputTimestampPacket = timestamp.getNode(0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); offset = bcsCsr.commandStream.getUsed(); - bcsCsr.flushBcsTask(blitPropertiesContainer, true, false, *pDevice); + bcsCsr.flushBcsTask(blitPropertiesContainer, true, *pDevice); EXPECT_TRUE(verify(false)); } @@ -3111,13 +3111,13 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT bcsCsr, &mockAllocation, nullptr, hostPtr, mockAllocation.getGpuAddress(), 0, 0, 0, {1, 1, 1}, 0, 0, 0, 0); - blitProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.outputTimestampPacket = timestamp.getNode(0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); offset = bcsCsr.commandStream.getUsed(); - bcsCsr.flushBcsTask(blitPropertiesContainer, true, false, *pDevice); + bcsCsr.flushBcsTask(blitPropertiesContainer, true, *pDevice); EXPECT_TRUE(verify(false)); } @@ -3127,13 +3127,13 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT mockAllocation.memoryPool = MemoryPool::localMemory; auto blitProperties = BlitProperties::constructPropertiesForCopy(&mockAllocation, &mockAllocation, {0, 0, 0}, {0, 0, 0}, {1, 1, 1}, 0, 0, 0, 0, nullptr); - blitProperties.outputTimestampPacket = timestamp.getNode(0); + blitProperties.blitSyncProperties.outputTimestampPacket = timestamp.getNode(0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties); offset = bcsCsr.commandStream.getUsed(); - bcsCsr.flushBcsTask(blitPropertiesContainer, true, false, *pDevice); + bcsCsr.flushBcsTask(blitPropertiesContainer, true, *pDevice); EXPECT_TRUE(verify(false)); } @@ -3362,7 +3362,7 @@ HWTEST_F(CommandStreamReceiverHwTest, givenMultiRootDeviceSyncNodeWhenFlushBcsTa BlitPropertiesContainer container; container.push_back(blitProperties); - commandStreamReceiver.flushBcsTask(container, true, false, *pDevice); + commandStreamReceiver.flushBcsTask(container, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(commandStreamReceiver.commandStream, 0); @@ -3392,7 +3392,7 @@ HWTEST_F(CommandStreamReceiverHwTest, givenNullPtrAsMultiRootDeviceSyncNodeWhenF BlitPropertiesContainer container; container.push_back(blitProperties); - commandStreamReceiver.flushBcsTask(container, true, false, *pDevice); + commandStreamReceiver.flushBcsTask(container, true, *pDevice); HardwareParse hwParser; hwParser.parseCommands(commandStreamReceiver.commandStream, 0);