From 5fe9d70066dd7789351fa9a2f177f9c69d925cd3 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Tue, 6 Jun 2023 15:11:09 +0000 Subject: [PATCH] feature: new multitile post sync layout for immediate write [1/n] No functional changes in this commit. This is prework. Related-To: NEO-7966 Signed-off-by: Dunajski, Bartosz --- .../source/cmdlist/cmdlist_hw_xehp_and_later.inl | 2 +- ...st_cmdlist_append_multipartition_prologue.cpp | 2 +- .../sources/cmdqueue/test_cmdqueue_2.cpp | 8 ++++---- .../test/unit_tests/sources/fence/test_fence.cpp | 2 +- .../command_queue/command_queue_hw_1_tests.cpp | 2 +- ..._receiver_flush_task_tests_xehp_and_later.cpp | 2 +- .../helpers/timestamp_packet_2_tests.cpp | 4 ++-- .../source/command_container/implicit_scaling.h | 3 ++- .../implicit_scaling_xehp_and_later.inl | 7 ++++++- .../aub_command_stream_receiver_hw_base.inl | 4 ++-- .../command_stream/command_stream_receiver.cpp | 12 ++++++------ .../command_stream/command_stream_receiver.h | 11 ++++++++--- .../command_stream_receiver_hw_base.inl | 2 +- ...command_stream_receiver_hw_xehp_and_later.inl | 5 +++-- .../command_stream_receiver_with_aub_dump.inl | 2 +- .../tbx_command_stream_receiver_hw.inl | 4 ++-- .../direct_submission_bdw_and_later.inl | 4 ++-- .../direct_submission/direct_submission_hw.h | 4 ++-- .../direct_submission/direct_submission_hw.inl | 2 +- .../direct_submission_xe_hp_core_and_later.inl | 8 ++++---- .../linux/drm_direct_submission.inl | 8 ++++---- .../source/memory_manager/allocations_list.cpp | 2 +- .../os_interface/linux/drm_command_stream.inl | 2 +- .../linux/drm_command_stream_xehp_and_later.inl | 4 ++-- .../os_interface/linux/drm_memory_manager.cpp | 2 +- .../common/libult/ult_command_stream_receiver.h | 3 ++- .../linux/mock_drm_command_stream_receiver.h | 3 ++- .../common/mocks/mock_command_stream_receiver.h | 6 ++++-- .../common/mocks/mock_direct_submission_hw.h | 2 +- .../command_stream_receiver_tests.cpp | 16 ++++++++-------- ...mmand_stream_receiver_with_aub_dump_tests.cpp | 2 +- .../direct_submission_tests_2.cpp | 6 +++--- .../linux/drm_direct_submission_tests.cpp | 8 ++++---- .../deferrable_allocation_deletion_tests.cpp | 2 +- .../internal_allocation_storage_tests.cpp | 2 +- ...ommand_stream_xehp_and_later_prelim_tests.cpp | 4 ++-- .../drm_command_stream_xehp_and_later_tests.cpp | 8 ++++---- 37 files changed, 93 insertions(+), 77 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index d3aa35740f..2d6f04bebd 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -386,7 +386,7 @@ void CommandListCoreFamily::appendMultiPartitionPrologue(uint32_t template void CommandListCoreFamily::appendMultiPartitionEpilogue() { NEO::ImplicitScalingDispatch::dispatchOffsetRegister(*commandContainer.getCommandStream(), - NEO::ImplicitScalingDispatch::getPostSyncOffset()); + NEO::ImplicitScalingDispatch::getImmediateWritePostSyncOffset()); } template diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_multipartition_prologue.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_multipartition_prologue.cpp index d0d586d8f3..2ed8d59a8b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_multipartition_prologue.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_multipartition_prologue.cpp @@ -98,7 +98,7 @@ HWTEST2_F(MultiPartitionEpilogueTest, whenAppendMultiPartitionEpilogueIsCalledTh auto lriCmd = genCmdCast(*itorLri); EXPECT_EQ(NEO::PartitionRegisters::addressOffsetCCSOffset, static_cast(lriCmd->getRegisterOffset())); - EXPECT_EQ(NEO::ImplicitScalingDispatch::getPostSyncOffset(), static_cast(lriCmd->getDataDword())); + EXPECT_EQ(NEO::ImplicitScalingDispatch::getImmediateWritePostSyncOffset(), static_cast(lriCmd->getDataDword())); EXPECT_EQ(true, lriCmd->getMmioRemapEnable()); auto result = commandList->close(); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index 385823d5e1..52ae9a32bc 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -302,11 +302,11 @@ HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenMultiplePartitionCountWhenC if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) { csr->createPreemptionAllocation(); } - EXPECT_NE(0u, csr->getPostSyncWriteOffset()); + EXPECT_NE(0u, csr->getImmWritePostSyncWriteOffset()); volatile TagAddressType *tagAddress = csr->getTagAddress(); for (uint32_t i = 0; i < 2; i++) { *tagAddress = 0xFF; - tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset()); + tagAddress = ptrOffset(tagAddress, csr->getImmWritePostSyncWriteOffset()); } csr->activePartitions = 2u; auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, @@ -345,11 +345,11 @@ HWTEST2_F(MultiTileCommandQueueSynchronizeTest, givenCsrHasMultipleActivePartiti if (device->getNEODevice()->getPreemptionMode() == PreemptionMode::MidThread || device->getNEODevice()->isDebuggerActive()) { csr->createPreemptionAllocation(); } - EXPECT_NE(0u, csr->getPostSyncWriteOffset()); + EXPECT_NE(0u, csr->getImmWritePostSyncWriteOffset()); volatile TagAddressType *tagAddress = csr->getTagAddress(); for (uint32_t i = 0; i < 2; i++) { *tagAddress = 0xFF; - tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset()); + tagAddress = ptrOffset(tagAddress, csr->getImmWritePostSyncWriteOffset()); } csr->activePartitions = 2u; auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, diff --git a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp index 9408515cc3..a0fb1fe098 100644 --- a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp +++ b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp @@ -239,7 +239,7 @@ TEST_F(FenceSynchronizeTest, givenInfiniteTimeoutWhenWaitingForFenceCompletionTh const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); ASSERT_NE(nullptr, csr->getTagAddress()); - csr->postSyncWriteOffset = postSyncOffset; + csr->immWritePostSyncWriteOffset = postSyncOffset; csr->activePartitions = activePartitions; Mock cmdqueue(device, csr.get()); diff --git a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp index 9e36b620e9..2dc6a60046 100644 --- a/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_hw_1_tests.cpp @@ -850,7 +850,7 @@ HWTEST_F(CommandQueueHwTest, GivenMultiTileQueueWhenEventNotCompletedAndFinishIs auto &csr = this->pCmdQ->getGpgpuCommandStreamReceiver(); csr.setActivePartitions(2u); auto ultCsr = reinterpret_cast *>(&csr); - ultCsr->postSyncWriteOffset = 32; + ultCsr->immWritePostSyncWriteOffset = 32; auto tagAddress = csr.getTagAddress(); *ptrOffset(tagAddress, 32) = *tagAddress; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp index e6a5bcfb3c..14cdb3475b 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp @@ -856,7 +856,7 @@ struct CommandStreamReceiverFlushTaskXeHPAndLaterMultiTileTests : public Command expectedWparidData = commandStreamReceiver.getWorkPartitionAllocationGpuAddress(); } uint32_t expectedWparidRegister = 0x221C; - uint32_t expectedAddressOffsetData = commandStreamReceiver.getPostSyncWriteOffset(); + uint32_t expectedAddressOffsetData = commandStreamReceiver.getImmWritePostSyncWriteOffset(); uint32_t expectedAddressOffsetRegister = 0x23B4; bool wparidConfiguration = false; diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index dbd7073c44..a4c1f44bca 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -114,7 +114,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, TimestampPacketTests, givenEmptyWaitlistAndEventWhe commandStreamReceiver->staticWorkPartitioningEnabled = true; device->resetCommandStreamReceiver(commandStreamReceiver.release()); - *ptrOffset(commandStreamReceiverPtr->tagAddress, commandStreamReceiverPtr->postSyncWriteOffset) = *commandStreamReceiverPtr->tagAddress; + *ptrOffset(commandStreamReceiverPtr->tagAddress, commandStreamReceiverPtr->immWritePostSyncWriteOffset) = *commandStreamReceiverPtr->tagAddress; auto cmdQ = clUniquePtr(new MockCommandQueueHw(context, device.get(), nullptr)); cmdQ->setProfilingEnabled(); @@ -254,7 +254,7 @@ HWTEST_F(TimestampPacketTests, givenMultiTileConfigWhenProgrammingNonStallingBar csr.activePartitionsConfig = 2; csr.staticWorkPartitioningEnabled = true; - *ptrOffset(csr.tagAddress, csr.postSyncWriteOffset) = *csr.tagAddress; + *ptrOffset(csr.tagAddress, csr.immWritePostSyncWriteOffset) = *csr.tagAddress; MockKernelWithInternals mockKernel(*device, context); diff --git a/shared/source/command_container/implicit_scaling.h b/shared/source/command_container/implicit_scaling.h index 13a9684349..8dc062ea57 100644 --- a/shared/source/command_container/implicit_scaling.h +++ b/shared/source/command_container/implicit_scaling.h @@ -81,7 +81,8 @@ struct ImplicitScalingDispatch { static void dispatchOffsetRegister(LinearStream &commandStream, uint32_t addressOffset); - static uint32_t getPostSyncOffset(); + static uint32_t getImmediateWritePostSyncOffset(); + static uint32_t getTimeStampPostSyncOffset(); static bool platformSupportsImplicitScaling(const RootDeviceEnvironment &rootDeviceEnvironment); diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index 5d266f24d1..37d23003f8 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -234,7 +234,12 @@ inline void ImplicitScalingDispatch::dispatchOffsetRegister(LinearStr } template -inline uint32_t ImplicitScalingDispatch::getPostSyncOffset() { +inline uint32_t ImplicitScalingDispatch::getImmediateWritePostSyncOffset() { + return static_cast(GfxCoreHelperHw::getSingleTimestampPacketSizeHw()); +} + +template +inline uint32_t ImplicitScalingDispatch::getTimeStampPostSyncOffset() { return static_cast(GfxCoreHelperHw::getSingleTimestampPacketSizeHw()); } diff --git a/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl b/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl index 2f098d120b..abc3041db8 100644 --- a/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl @@ -320,7 +320,7 @@ SubmissionStatus AUBCommandStreamReceiverHw::flush(BatchBuffer &batch volatile TagAddressType *pollAddress = this->tagAddress; for (uint32_t i = 0; i < this->activePartitions; i++) { *pollAddress = this->peekLatestSentTaskCount(); - pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); + pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset); } } return SubmissionStatus::SUCCESS; @@ -361,7 +361,7 @@ SubmissionStatus AUBCommandStreamReceiverHw::flush(BatchBuffer &batch volatile TagAddressType *pollAddress = this->tagAddress; for (uint32_t i = 0; i < this->activePartitions; i++) { *pollAddress = this->peekLatestSentTaskCount(); - pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); + pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset); } } diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 11077fe347..0f7c67f3e1 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -456,7 +456,7 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile TagAddressType *poll } } - partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset); + partitionAddress = ptrOffset(partitionAddress, this->immWritePostSyncWriteOffset); } partitionAddress = pollAddress; @@ -464,7 +464,7 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile TagAddressType *poll if (*partitionAddress < taskCountToWait) { return WaitStatus::NotReady; } - partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset); + partitionAddress = ptrOffset(partitionAddress, this->immWritePostSyncWriteOffset); } return WaitStatus::Ready; @@ -748,9 +748,9 @@ bool CommandStreamReceiver::initializeTagAllocation() { uint32_t subDevices = static_cast(this->deviceBitfield.count()); for (uint32_t i = 0; i < subDevices; i++) { *tagAddress = initValue; - tagAddress = ptrOffset(tagAddress, this->postSyncWriteOffset); + tagAddress = ptrOffset(tagAddress, this->immWritePostSyncWriteOffset); *completionFence = 0; - completionFence = ptrOffset(completionFence, this->postSyncWriteOffset); + completionFence = ptrOffset(completionFence, this->immWritePostSyncWriteOffset); } *this->debugPauseStateAddress = DebugManager.flags.EnableNullHardware.get() ? DebugPauseState::disabled : DebugPauseState::waitingForFirstSemaphore; @@ -957,7 +957,7 @@ bool CommandStreamReceiver::testTaskCountReady(volatile TagAddressType *pollAddr return false; } - pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); + pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset); } return true; } @@ -1011,7 +1011,7 @@ void CommandStreamReceiver::printTagAddressContent(TaskCountType taskCountToWait } for (uint32_t i = 0; i < activePartitions; i++) { PRINT_DEBUG_STRING(true, stdout, " %u", *postSyncAddress); - postSyncAddress = ptrOffset(postSyncAddress, this->postSyncWriteOffset); + postSyncAddress = ptrOffset(postSyncAddress, this->immWritePostSyncWriteOffset); } PRINT_DEBUG_STRING(true, stdout, "%s", "\n"); } diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index e80d6b37a1..bb2f1716cf 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -337,8 +337,12 @@ class CommandStreamReceiver { std::unique_ptr pageTableManager; - inline uint32_t getPostSyncWriteOffset() const { - return postSyncWriteOffset; + inline uint32_t getImmWritePostSyncWriteOffset() const { + return immWritePostSyncWriteOffset; + } + + inline uint32_t getTimeStampPostSyncWriteOffset() const { + return timeStampPostSyncWriteOffset; } inline bool isMultiTileOperationEnabled() const { @@ -504,7 +508,8 @@ class CommandStreamReceiver { MemoryCompressionState lastMemoryCompressionState = MemoryCompressionState::NotApplicable; uint32_t activePartitions = 1; uint32_t activePartitionsConfig = 1; - uint32_t postSyncWriteOffset = 0; + uint32_t immWritePostSyncWriteOffset = 0; + uint32_t timeStampPostSyncWriteOffset = 0; TaskCountType completionFenceValue = 0; const uint32_t rootDeviceIndex; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 3c0028fe71..34c8fcd781 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -945,7 +945,7 @@ inline WaitStatus CommandStreamReceiverHw::waitForTaskCountWithKmdNot } for (uint32_t i = 0; i < this->activePartitions; i++) { - UNRECOVERABLE_IF(*(ptrOffset(getTagAddress(), (i * this->postSyncWriteOffset))) < taskCountToWait); + UNRECOVERABLE_IF(*(ptrOffset(getTagAddress(), (i * this->immWritePostSyncWriteOffset))) < taskCountToWait); } if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) { diff --git a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl index 6332679bc2..3462e9f604 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl @@ -127,7 +127,7 @@ template inline void CommandStreamReceiverHw::programActivePartitionConfig(LinearStream &csr) { if (this->staticWorkPartitioningEnabled) { uint64_t workPartitionAddress = getWorkPartitionAllocationGpuAddress(); - ImplicitScalingDispatch::dispatchRegisterConfiguration(csr, workPartitionAddress, this->postSyncWriteOffset); + ImplicitScalingDispatch::dispatchRegisterConfiguration(csr, workPartitionAddress, this->immWritePostSyncWriteOffset); } this->activePartitionsConfig = this->activePartitions; } @@ -217,7 +217,8 @@ inline void CommandStreamReceiverHw::programStallingPostSyncCommandsF template inline void CommandStreamReceiverHw::configurePostSyncWriteOffset() { - this->postSyncWriteOffset = ImplicitScalingDispatch::getPostSyncOffset(); + this->immWritePostSyncWriteOffset = ImplicitScalingDispatch::getImmediateWritePostSyncOffset(); + this->timeStampPostSyncWriteOffset = ImplicitScalingDispatch::getTimeStampPostSyncOffset(); } } // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl b/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl index aad4009904..9d9c6c8ee0 100644 --- a/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl +++ b/shared/source/command_stream/command_stream_receiver_with_aub_dump.inl @@ -36,7 +36,7 @@ CommandStreamReceiverWithAUBDump::CommandStreamReceiverWithAUBDump(cons for (uint32_t i = 0; i < subDevices; i++) { *tagAddressToInitialize = std::numeric_limits::max(); - tagAddressToInitialize = ptrOffset(tagAddressToInitialize, this->postSyncWriteOffset); + tagAddressToInitialize = ptrOffset(tagAddressToInitialize, this->immWritePostSyncWriteOffset); } } } diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.inl b/shared/source/command_stream/tbx_command_stream_receiver_hw.inl index e90ec8da32..f3746e6237 100644 --- a/shared/source/command_stream/tbx_command_stream_receiver_hw.inl +++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.inl @@ -489,7 +489,7 @@ void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocatio while (*pollAddress < this->latestFlushedTaskCount) { this->downloadAllocation(*this->getTagAllocation()); } - pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); + pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset); } auto lockCSR = this->obtainUniqueOwnership(); @@ -562,7 +562,7 @@ void TbxCommandStreamReceiverHw::downloadAllocations() { while (*pollAddress < this->latestFlushedTaskCount) { this->downloadAllocation(*this->getTagAllocation()); } - pollAddress = ptrOffset(pollAddress, this->postSyncWriteOffset); + pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset); } auto lockCSR = this->obtainUniqueOwnership(); for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { diff --git a/shared/source/direct_submission/direct_submission_bdw_and_later.inl b/shared/source/direct_submission/direct_submission_bdw_and_later.inl index 28100ac00d..40857a5ccf 100644 --- a/shared/source/direct_submission/direct_submission_bdw_and_later.inl +++ b/shared/source/direct_submission/direct_submission_bdw_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -19,7 +19,7 @@ inline size_t DirectSubmissionHw::getSizePartitionRegiste } template -inline void DirectSubmissionHw::setPostSyncOffset() { +inline void DirectSubmissionHw::setImmWritePostSyncOffset() { } } // namespace NEO diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index 544c02dd5a..ad6d591b60 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -165,7 +165,7 @@ class DirectSubmissionHw { MOCKABLE_VIRTUAL void performDiagnosticMode(); void dispatchDiagnosticModeSection(); size_t getDiagnosticModeSection(); - void setPostSyncOffset(); + void setImmWritePostSyncOffset(); virtual bool isCompleted(uint32_t ringBufferIndex) = 0; @@ -217,7 +217,7 @@ class DirectSubmissionHw { uint32_t workloadMode = 0; uint32_t workloadModeOneExpectedValue = 0u; uint32_t activeTiles = 1u; - uint32_t postSyncOffset = 0u; + uint32_t immWritePostSyncOffset = 0u; uint32_t currentRelaxedOrderingQueueSize = 0; DirectSubmissionSfenceMode sfenceMode = DirectSubmissionSfenceMode::BeforeAndAfterSemaphore; volatile uint32_t reserved = 0u; diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index f3d9ed4916..2b54316134 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -81,7 +81,7 @@ DirectSubmissionHw::DirectSubmissionHw(const DirectSubmis UNRECOVERABLE_IF(!CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureClflush) && !disableCpuCacheFlush); createDiagnostic(); - setPostSyncOffset(); + setImmWritePostSyncOffset(); dcFlushRequired = MemorySynchronizationCommands::getDcFlushEnable(true, inputParams.rootDeviceEnvironment); auto &gfxCoreHelper = inputParams.rootDeviceEnvironment.getHelper(); diff --git a/shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl b/shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl index 7ca1e6f838..5ab3f3c436 100644 --- a/shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl +++ b/shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2022 Intel Corporation + * Copyright (C) 2021-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -15,7 +15,7 @@ template inline void DirectSubmissionHw::dispatchPartitionRegisterConfiguration() { ImplicitScalingDispatch::dispatchRegisterConfiguration(ringCommandStream, this->workPartitionAllocation->getGpuAddress(), - this->postSyncOffset); + this->immWritePostSyncOffset); } template @@ -24,8 +24,8 @@ inline size_t DirectSubmissionHw::getSizePartitionRegiste } template -inline void DirectSubmissionHw::setPostSyncOffset() { - this->postSyncOffset = ImplicitScalingDispatch::getPostSyncOffset(); +inline void DirectSubmissionHw::setImmWritePostSyncOffset() { + this->immWritePostSyncOffset = ImplicitScalingDispatch::getImmediateWritePostSyncOffset(); } } // namespace NEO diff --git a/shared/source/direct_submission/linux/drm_direct_submission.inl b/shared/source/direct_submission/linux/drm_direct_submission.inl index 84875578cc..44a7923436 100644 --- a/shared/source/direct_submission/linux/drm_direct_submission.inl +++ b/shared/source/direct_submission/linux/drm_direct_submission.inl @@ -100,7 +100,7 @@ inline DrmDirectSubmission::~DrmDirectSubmission() { auto osContextLinux = static_cast(&this->osContext); auto &drm = osContextLinux->getDrm(); auto completionFenceCpuAddress = reinterpret_cast(this->completionFenceAllocation->getUnderlyingBuffer()) + TagAllocationLayout::completionFenceOffset; - drm.waitOnUserFences(*osContextLinux, completionFenceCpuAddress, this->completionFenceValue, this->activeTiles, this->postSyncOffset); + drm.waitOnUserFences(*osContextLinux, completionFenceCpuAddress, this->completionFenceValue, this->activeTiles, this->immWritePostSyncOffset); } this->deallocateResources(); if (this->pciBarrierPtr) { @@ -170,7 +170,7 @@ bool DrmDirectSubmission::submit(uint64_t gpuAddress, siz } drmContextId++; if (completionFenceGpuAddress) { - completionFenceGpuAddress += this->postSyncOffset; + completionFenceGpuAddress += this->immWritePostSyncOffset; } } } @@ -264,7 +264,7 @@ inline bool DrmDirectSubmission::isCompleted(uint32_t rin if (*pollAddress < taskCount) { return false; } - pollAddress = ptrOffset(pollAddress, this->postSyncOffset); + pollAddress = ptrOffset(pollAddress, this->immWritePostSyncOffset); } return true; } @@ -280,7 +280,7 @@ void DrmDirectSubmission::wait(TaskCountType taskCountToW for (uint32_t i = 0; i < this->activeTiles; i++) { while (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) { } - pollAddress = ptrOffset(pollAddress, this->postSyncOffset); + pollAddress = ptrOffset(pollAddress, this->immWritePostSyncOffset); } } diff --git a/shared/source/memory_manager/allocations_list.cpp b/shared/source/memory_manager/allocations_list.cpp index 55b9d578ba..5c7587457b 100644 --- a/shared/source/memory_manager/allocations_list.cpp +++ b/shared/source/memory_manager/allocations_list.cpp @@ -54,7 +54,7 @@ std::unique_ptr AllocationsList::detachAllocation(size_t req req.contextId = (commandStreamReceiver == nullptr) ? UINT32_MAX : commandStreamReceiver->getOsContext().getContextId(); req.requiredPtr = requiredPtr; req.activeTileCount = (commandStreamReceiver == nullptr) ? 1u : commandStreamReceiver->getActivePartitions(); - req.tagOffset = (commandStreamReceiver == nullptr) ? 0u : commandStreamReceiver->getPostSyncWriteOffset(); + req.tagOffset = (commandStreamReceiver == nullptr) ? 0u : commandStreamReceiver->getImmWritePostSyncWriteOffset(); req.forceSystemMemoryFlag = forceSystemMemoryFlag; GraphicsAllocation *a = nullptr; GraphicsAllocation *retAlloc = processLocked(a, static_cast(&req)); diff --git a/shared/source/os_interface/linux/drm_command_stream.inl b/shared/source/os_interface/linux/drm_command_stream.inl index 6ba7340668..d8a92e8cb9 100644 --- a/shared/source/os_interface/linux/drm_command_stream.inl +++ b/shared/source/os_interface/linux/drm_command_stream.inl @@ -238,7 +238,7 @@ int DrmCommandStreamReceiver::exec(const BatchBuffer &batchBuffer, ui uint64_t completionGpuAddress = 0; TaskCountType completionValue = 0; if (this->drm->isVmBindAvailable() && this->drm->completionFenceSupport()) { - completionGpuAddress = getTagAllocation()->getGpuAddress() + (index * this->postSyncWriteOffset) + TagAllocationLayout::completionFenceOffset; + completionGpuAddress = getTagAllocation()->getGpuAddress() + (index * this->immWritePostSyncWriteOffset) + TagAllocationLayout::completionFenceOffset; completionValue = this->latestSentTaskCount; } diff --git a/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl b/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl index deb0d36efd..72fcea3cbc 100644 --- a/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl +++ b/shared/source/os_interface/linux/drm_command_stream_xehp_and_later.inl @@ -70,12 +70,12 @@ int DrmCommandStreamReceiver::waitUserFence(TaskCountType waitValue) UNRECOVERABLE_IF(ctxIds.size() != this->activePartitions); for (uint32_t i = 0; i < this->activePartitions; i++) { ret |= this->drm->waitUserFence(ctxIds[i], tagAddress, waitValue, Drm::ValueWidth::U64, kmdWaitTimeout, 0u); - tagAddress += this->postSyncWriteOffset; + tagAddress += this->immWritePostSyncWriteOffset; } } else { for (uint32_t i = 0; i < this->activePartitions; i++) { ret |= this->drm->waitUserFence(0u, tagAddress, waitValue, Drm::ValueWidth::U64, kmdWaitTimeout, 0u); - tagAddress += this->postSyncWriteOffset; + tagAddress += this->immWritePostSyncWriteOffset; } } diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index bfc6592222..9a137f11eb 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -2014,7 +2014,7 @@ void DrmMemoryManager::waitOnCompletionFence(GraphicsAllocation *allocation) { if (allocation->isUsedByOsContext(osContextId)) { Drm &drm = getDrm(csr->getRootDeviceIndex()); - drm.waitOnUserFences(static_cast(*osContext), completionFenceAddress, allocationTaskCount, csr->getActivePartitions(), csr->getPostSyncWriteOffset()); + drm.waitOnUserFences(static_cast(*osContext), completionFenceAddress, allocationTaskCount, csr->getActivePartitions(), csr->getImmWritePostSyncWriteOffset()); } } } else { diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 0b8b94440a..b844c91eb6 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -93,6 +93,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::globalFenceAllocation; using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod; using BaseClass::CommandStreamReceiver::gsbaFor32BitProgrammed; + using BaseClass::CommandStreamReceiver::immWritePostSyncWriteOffset; using BaseClass::CommandStreamReceiver::initDirectSubmission; using BaseClass::CommandStreamReceiver::internalAllocationStorage; using BaseClass::CommandStreamReceiver::isBlitterDirectSubmissionEnabled; @@ -118,7 +119,6 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::ownershipMutex; using BaseClass::CommandStreamReceiver::perfCounterAllocator; using BaseClass::CommandStreamReceiver::pipelineSupportFlags; - using BaseClass::CommandStreamReceiver::postSyncWriteOffset; using BaseClass::CommandStreamReceiver::profilingTimeStampAllocator; using BaseClass::CommandStreamReceiver::requiredPrivateScratchSize; using BaseClass::CommandStreamReceiver::requiredScratchSize; @@ -133,6 +133,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ using BaseClass::CommandStreamReceiver::taskLevel; using BaseClass::CommandStreamReceiver::timestampPacketAllocator; using BaseClass::CommandStreamReceiver::timestampPacketWriteEnabled; + using BaseClass::CommandStreamReceiver::timeStampPostSyncWriteOffset; using BaseClass::CommandStreamReceiver::useGpuIdleImplicitFlush; using BaseClass::CommandStreamReceiver::useNewResourceImplicitFlush; using BaseClass::CommandStreamReceiver::useNotifyEnableForPostSync; diff --git a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h index d0dcf745f0..de7621a7fb 100644 --- a/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h +++ b/shared/test/common/mocks/linux/mock_drm_command_stream_receiver.h @@ -33,12 +33,13 @@ class TestedDrmCommandStreamReceiver : public DrmCommandStreamReceiver { using CommandStreamReceiver::feSupportFlags; using CommandStreamReceiver::globalFenceAllocation; using CommandStreamReceiver::heapStorageRequiresRecyclingTag; + using CommandStreamReceiver::immWritePostSyncWriteOffset; using CommandStreamReceiver::isPreambleSent; using CommandStreamReceiver::latestFlushedTaskCount; using CommandStreamReceiver::mediaVfeStateDirty; using CommandStreamReceiver::nTo1SubmissionModelEnabled; using CommandStreamReceiver::pageTableManagerInitialized; - using CommandStreamReceiver::postSyncWriteOffset; using CommandStreamReceiver::requiredScratchSize; using CommandStreamReceiver::sbaSupportFlags; using CommandStreamReceiver::streamProperties; @@ -274,6 +275,7 @@ class MockCsrHw2 : public CommandStreamReceiverHw { using CommandStreamReceiver::taskCount; using CommandStreamReceiver::taskLevel; using CommandStreamReceiver::timestampPacketWriteEnabled; + using CommandStreamReceiver::timeStampPostSyncWriteOffset; using CommandStreamReceiver::useGpuIdleImplicitFlush; using CommandStreamReceiver::useNewResourceImplicitFlush; diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index cb6452af33..bc72998687 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -51,6 +51,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::getSizeSwitchRingBufferSection; using BaseClass::getSizeSystemMemoryFenceAddress; using BaseClass::hwInfo; + using BaseClass::immWritePostSyncOffset; using BaseClass::isDisablePrefetcherRequired; using BaseClass::miMemFenceRequired; using BaseClass::osContext; @@ -58,7 +59,6 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::partitionedMode; using BaseClass::pciBarrierPtr; using BaseClass::performDiagnosticMode; - using BaseClass::postSyncOffset; using BaseClass::preinitializedRelaxedOrderingScheduler; using BaseClass::preinitializedTaskStoreSection; using BaseClass::relaxedOrderingEnabled; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 03aaf0039e..cc2d8d3f26 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -1309,7 +1309,7 @@ TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTa MockExecutionEnvironment executionEnvironment(defaultHwInfo.get()); DeviceBitfield devices(0b11); auto csr = std::make_unique(executionEnvironment, 0, devices); - csr->postSyncWriteOffset = 32u; + csr->immWritePostSyncWriteOffset = 32u; executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_EQ(nullptr, csr->getTagAllocation()); csr->initializeTagAllocation(); @@ -1319,7 +1319,7 @@ TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenInitializeTa auto tagAddress = csr->getTagAddress(); for (uint32_t i = 0; i < 2; i++) { EXPECT_EQ(*tagAddress, initialHardwareTag); - tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset()); + tagAddress = ptrOffset(tagAddress, csr->getImmWritePostSyncWriteOffset()); } } @@ -1327,7 +1327,7 @@ TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenEnsureTagAll MockExecutionEnvironment executionEnvironment(defaultHwInfo.get(), true, 10u); DeviceBitfield devices(0b1111); auto csr = std::make_unique(executionEnvironment, 0, devices); - csr->postSyncWriteOffset = 32u; + csr->immWritePostSyncWriteOffset = 32u; executionEnvironment.memoryManager.reset(new OsAgnosticMemoryManager(executionEnvironment)); EXPECT_EQ(nullptr, csr->getTagAllocation()); @@ -1340,7 +1340,7 @@ TEST(CommandStreamReceiverSimpleTest, givenCommandStreamReceiverWhenEnsureTagAll auto tagAddress = csr->getTagAddress(); for (uint32_t i = 0; i < 4; i++) { EXPECT_EQ(*tagAddress, initialHardwareTag); - tagAddress = ptrOffset(tagAddress, csr->getPostSyncWriteOffset()); + tagAddress = ptrOffset(tagAddress, csr->getImmWritePostSyncWriteOffset()); } auto tagsMultiAllocation = csr->getTagsMultiAllocation(); @@ -1594,9 +1594,9 @@ TEST(CommandStreamReceiverSimpleTest, givenMultipleActivePartitionsWhenWaitingFo temporaryAllocation->updateTaskCount(0u, 0u); csr.getInternalAllocationStorage()->storeAllocationWithTaskCount(std::move(temporaryAllocation), TEMPORARY_ALLOCATION, 2u); - csr.postSyncWriteOffset = 32u; + csr.immWritePostSyncWriteOffset = 32u; csr.mockTagAddress[0] = 0u; - auto nextPartitionTagAddress = ptrOffset(&csr.mockTagAddress[0], csr.getPostSyncWriteOffset()); + auto nextPartitionTagAddress = ptrOffset(&csr.mockTagAddress[0], csr.getImmWritePostSyncWriteOffset()); *nextPartitionTagAddress = 0u; csr.taskCount = 3u; @@ -1608,7 +1608,7 @@ TEST(CommandStreamReceiverSimpleTest, givenMultipleActivePartitionsWhenWaitingFo CpuIntrinsicsTests::pauseAddress = &csr.mockTagAddress[0]; CpuIntrinsicsTests::pauseValue = 3u; - CpuIntrinsicsTests::pauseOffset = csr.getPostSyncWriteOffset(); + CpuIntrinsicsTests::pauseOffset = csr.getImmWritePostSyncWriteOffset(); CpuIntrinsicsTests::pauseCounter = 0; @@ -2250,7 +2250,7 @@ HWTEST_F(CommandStreamReceiverTest, givenMultipleActivePartitionsWhenWaitLogIsEn volatile TagAddressType *tagAddress = csr.tagAddress; constexpr TagAddressType tagValue = 2; *tagAddress = tagValue; - tagAddress = ptrOffset(tagAddress, csr.postSyncWriteOffset); + tagAddress = ptrOffset(tagAddress, csr.immWritePostSyncWriteOffset); *tagAddress = tagValue; WaitParams waitParams; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp index 4460fe975e..ac275cb909 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_with_aub_dump_tests.cpp @@ -326,7 +326,7 @@ HWTEST_F(CommandStreamReceiverWithAubDumpSimpleTest, givenCsrWithAubDumpWhenCrea for (uint32_t i = 0; i < subDevicesCount; i++) { EXPECT_EQ(std::numeric_limits::max(), *tagAddressToInitialize); - tagAddressToInitialize = ptrOffset(tagAddressToInitialize, csrWithAubDump.aubCSR->getPostSyncWriteOffset()); + tagAddressToInitialize = ptrOffset(tagAddressToInitialize, csrWithAubDump.aubCSR->getImmWritePostSyncWriteOffset()); } } diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index dfec61d876..8f3c1ca78b 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -808,8 +808,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.handleResidencyCount); EXPECT_EQ(expectedAllocationsCount, directSubmission.makeResourcesResidentVectorSize); - uint32_t expectedOffset = NEO::ImplicitScalingDispatch::getPostSyncOffset(); - EXPECT_EQ(expectedOffset, directSubmission.postSyncOffset); + uint32_t expectedOffset = NEO::ImplicitScalingDispatch::getImmediateWritePostSyncOffset(); + EXPECT_EQ(expectedOffset, directSubmission.immWritePostSyncOffset); HardwareParse hwParse; hwParse.parseCommands(directSubmission.ringCommandStream, 0); @@ -870,7 +870,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, hwParse.parseCommands(directSubmission.ringCommandStream, 0); hwParse.findHardwareCommands(); - uint32_t expectedOffset = NEO::ImplicitScalingDispatch::getPostSyncOffset(); + uint32_t expectedOffset = NEO::ImplicitScalingDispatch::getImmediateWritePostSyncOffset(); ASSERT_NE(hwParse.lriList.end(), hwParse.lriList.begin()); bool partitionRegisterFound = false; diff --git a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp index 9536ebfb27..215aaeb9a5 100644 --- a/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/linux/drm_direct_submission_tests.cpp @@ -80,13 +80,13 @@ struct MockDrmDirectSubmission : public DrmDirectSubmissionwaitUserFenceParams[0].address); EXPECT_EQ(expectedCompletionValueToWait, drm->waitUserFenceParams[1].value); - EXPECT_EQ(completionFenceBaseCpuAddress + commandStreamReceiver.getPostSyncWriteOffset(), drm->waitUserFenceParams[1].address); + EXPECT_EQ(completionFenceBaseCpuAddress + commandStreamReceiver.getImmWritePostSyncWriteOffset(), drm->waitUserFenceParams[1].address); } commandStreamReceiver.setupContext(*osContext); } @@ -613,7 +613,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenTwoTilesAndCompletionFenceSupportWhenSubm EXPECT_EQ(completionFenceBaseGpuAddress, mockBO.passedExecParams[0].completionGpuAddress); EXPECT_EQ(i + 1, mockBO.passedExecParams[0].completionValue); - EXPECT_EQ(completionFenceBaseGpuAddress + commandStreamReceiver.getPostSyncWriteOffset(), mockBO.passedExecParams[1].completionGpuAddress); + EXPECT_EQ(completionFenceBaseGpuAddress + commandStreamReceiver.getImmWritePostSyncWriteOffset(), mockBO.passedExecParams[1].completionGpuAddress); EXPECT_EQ(i + 1, mockBO.passedExecParams[1].completionValue); } ringBuffer->getBufferObjectToModify(0) = initialBO; @@ -790,7 +790,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmDirectSubmissionTest, givenMultipleActiveTilesWh MockDrmDirectSubmission directSubmission(*device->getDefaultEngine().commandStreamReceiver); - uint32_t offset = directSubmission.postSyncOffset; + uint32_t offset = directSubmission.immWritePostSyncOffset; EXPECT_NE(0u, offset); bool ret = directSubmission.allocateResources(); EXPECT_TRUE(ret); diff --git a/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp b/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp index 1615e8a7fc..6d96a1fe97 100644 --- a/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp +++ b/shared/test/unit_test/memory_manager/deferrable_allocation_deletion_tests.cpp @@ -235,7 +235,7 @@ TEST_F(DeferrableAllocationDeletionTest, givenAllocationUsedByUnregisteredEngine HWTEST_F(DeferrableAllocationDeletionTest, givenMultiTileWhenTaskCompletedOnSingleTileThenDoNotFreeGraphicsAllocation) { auto csr = reinterpret_cast *>(device->getDefaultEngine().commandStreamReceiver); csr->setActivePartitions(2u); - csr->postSyncWriteOffset = 32; + csr->immWritePostSyncWriteOffset = 32; auto hwTagNextTile = ptrOffset(hwTag, 32); auto allocation = memoryManager->allocateGraphicsMemoryWithProperties(MockAllocationProperties{device->getRootDeviceIndex(), MemoryConstants::pageSize}); diff --git a/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp b/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp index 20e5de78bc..26afe4d4bb 100644 --- a/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp +++ b/shared/test/unit_test/memory_manager/internal_allocation_storage_tests.cpp @@ -281,7 +281,7 @@ TEST_F(InternalAllocationStorageTest, givenAllocationListWhenTwoThreadsCleanConc HWTEST_F(InternalAllocationStorageTest, givenMultipleActivePartitionsWhenDetachingReusableAllocationThenCheckTaskCountFinishedOnAllTiles) { auto ultCsr = reinterpret_cast *>(csr); csr->setActivePartitions(2u); - ultCsr->postSyncWriteOffset = 32; + ultCsr->immWritePostSyncWriteOffset = 32; auto tagAddress = csr->getTagAddress(); *tagAddress = 0xFF; diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp index 2a868b5b99..3db8e8849a 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_prelim_tests.cpp @@ -208,7 +208,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab testDrmCsr->useUserFenceWait = true; testDrmCsr->useContextForUserFenceWait = false; testDrmCsr->activePartitions = 2u; - EXPECT_NE(0u, testDrmCsr->postSyncWriteOffset); + EXPECT_NE(0u, testDrmCsr->immWritePostSyncWriteOffset); uint64_t tagAddress = castToUint64(const_cast(testDrmCsr->getTagAddress())); FlushStamp handleToWait = 123; @@ -218,7 +218,7 @@ HWTEST_TEMPLATED_F(DrmCommandStreamEnhancedTestDrmPrelim, givenWaitUserFenceEnab EXPECT_EQ(123u, testDrmCsr->waitUserFenceResult.waitValue); EXPECT_EQ(2u, mock->context.gemWaitUserFenceCalled); - EXPECT_EQ(tagAddress + testDrmCsr->postSyncWriteOffset, mock->context.receivedGemWaitUserFence.addr); + EXPECT_EQ(tagAddress + testDrmCsr->immWritePostSyncWriteOffset, mock->context.receivedGemWaitUserFence.addr); EXPECT_EQ(handleToWait, mock->context.receivedGemWaitUserFence.value); EXPECT_EQ(0u, mock->context.receivedGemWaitUserFence.ctxId); EXPECT_EQ(DrmPrelimHelper::getGTEWaitUserFenceFlag(), mock->context.receivedGemWaitUserFence.op); diff --git a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp index ffa966ff8f..3b3ed5904a 100644 --- a/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp +++ b/shared/test/unit_test/os_interface/linux/drm_command_stream_xehp_and_later_tests.cpp @@ -99,9 +99,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo testCsr->makeResident(*testCsr->getTagAllocation()); testCsr->latestSentTaskCount = 2; - testCsr->postSyncWriteOffset = 16; + testCsr->immWritePostSyncWriteOffset = 16; - uint64_t expectedCompletionGpuAddress = testCsr->getTagAllocation()->getGpuAddress() + TagAllocationLayout::completionFenceOffset + testCsr->postSyncWriteOffset; + uint64_t expectedCompletionGpuAddress = testCsr->getTagAllocation()->getGpuAddress() + TagAllocationLayout::completionFenceOffset + testCsr->immWritePostSyncWriteOffset; SubmissionStatus ret = testCsr->flushInternal(batchBuffer, testCsr->getResidencyAllocations()); EXPECT_EQ(SubmissionStatus::SUCCESS, ret); @@ -117,7 +117,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo EngineControl &defaultEngine = device->getDefaultEngine(); EXPECT_EQ(2u, defaultEngine.commandStreamReceiver->getActivePartitions()); - uint32_t postSyncOffset = defaultEngine.commandStreamReceiver->getPostSyncWriteOffset(); + uint32_t postSyncOffset = defaultEngine.commandStreamReceiver->getImmWritePostSyncWriteOffset(); EXPECT_NE(0u, postSyncOffset); mock->completionFenceSupported = true; @@ -151,7 +151,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DrmCommandStreamMultiTileMemExecTest, GivenDrmSuppo EngineControl &defaultEngine = device->getDefaultEngine(); EXPECT_EQ(2u, defaultEngine.commandStreamReceiver->getActivePartitions()); - uint32_t postSyncOffset = defaultEngine.commandStreamReceiver->getPostSyncWriteOffset(); + uint32_t postSyncOffset = defaultEngine.commandStreamReceiver->getImmWritePostSyncWriteOffset(); EXPECT_NE(0u, postSyncOffset); mock->completionFenceSupported = true;