diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index b8da7ca135..030a43dcbe 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -994,7 +994,7 @@ ze_result_t CommandListCoreFamilyImmediate::hostSynchronize(uint6 } else { const int64_t timeoutInMicroSeconds = timeout / 1000; const auto indefinitelyPoll = timeout == std::numeric_limits::max(); - const auto waitStatus = waitCsr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, timeoutInMicroSeconds}, waitTaskCount); + const auto waitStatus = waitCsr->waitForCompletionWithTimeout(NEO::WaitParams{indefinitelyPoll, !indefinitelyPoll, false, timeoutInMicroSeconds}, waitTaskCount); if (waitStatus == NEO::WaitStatus::gpuHang) { status = ZE_RESULT_ERROR_DEVICE_LOST; } else if (waitStatus == NEO::WaitStatus::notReady) { diff --git a/level_zero/core/source/cmdlist/cmdlist_imp.cpp b/level_zero/core/source/cmdlist/cmdlist_imp.cpp index ea51f8a3f1..a143bf2157 100644 --- a/level_zero/core/source/cmdlist/cmdlist_imp.cpp +++ b/level_zero/core/source/cmdlist/cmdlist_imp.cpp @@ -51,7 +51,7 @@ ze_result_t CommandListImp::destroy() { if (isImmediateType() && this->isFlushTaskSubmissionEnabled && !this->isSyncModeQueue) { auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; - getCsr(false)->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, getCsr(false)->peekTaskCount()); + getCsr(false)->waitForCompletionWithTimeout(NEO::WaitParams{false, false, false, timeoutMicroseconds}, getCsr(false)->peekTaskCount()); } if (!isImmediateType() && diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index e67c62bbdc..dd37f369fc 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -177,7 +177,7 @@ ze_result_t CommandQueueImp::synchronizeByPollingForTaskCount(uint64_t timeoutNa timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; } - const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait); + const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, enableTimeout, false, timeoutMicroseconds}, taskCountToWait); if (waitStatus == NEO::WaitStatus::notReady) { return ZE_RESULT_NOT_READY; } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index bcb269f6f7..3b201da6dc 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -784,7 +784,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenWaitForT auto cmdBuffer = cmdBufferList.peekHead(); EXPECT_EQ(1u, cmdBuffer->taskCount); - mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, 1}, 1); + mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, false, 1}, 1); EXPECT_EQ(1u, mockCsr->peekLatestFlushedTaskCount()); @@ -2090,5 +2090,5 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenWaitForCompletionWithTimeoutI mockCsr.latestSentTaskCount = 1; auto cmdBuffer = std::make_unique(*pDevice); mockCsr.submissionAggregator->recordCommandBuffer(cmdBuffer.release()); - EXPECT_EQ(NEO::WaitStatus::notReady, mockCsr.waitForCompletionWithTimeout(WaitParams{false, false, 0}, 1)); + EXPECT_EQ(NEO::WaitStatus::notReady, mockCsr.waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, 1)); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index 846d8be0a7..b88220830d 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -816,7 +816,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress; CpuIntrinsicsTests::pauseValue = taskCountToWait; - const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, 1}, taskCountToWait); + const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{false, false, false, 1}, taskCountToWait); EXPECT_EQ(NEO::WaitStatus::ready, ret); } @@ -836,7 +836,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenTagValueNotMeetingTaskCountTo CpuIntrinsicsTests::pauseAddress = mockCsr->tagAddress; CpuIntrinsicsTests::pauseValue = taskCountToWait; - const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{true, true, 10}, taskCountToWait); + const auto ret = mockCsr->waitForCompletionWithTimeout(WaitParams{true, true, false, 10}, taskCountToWait); EXPECT_EQ(NEO::WaitStatus::notReady, ret); } diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 0dcbac8060..dfda24716d 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -215,7 +215,7 @@ WaitStatus CommandStreamReceiver::waitForTaskCount(TaskCountType requiredTaskCou auto address = getTagAddress(); if (!skipResourceCleanup() && address) { this->downloadTagAllocation(requiredTaskCount); - return baseWaitFunction(address, WaitParams{false, false, 0}, requiredTaskCount); + return baseWaitFunction(address, WaitParams{false, false, false, 0}, requiredTaskCount); } return WaitStatus::ready; diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index d26ebb8793..118e3893e7 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -828,7 +828,7 @@ inline WaitStatus CommandStreamReceiverHw::waitForTaskCountWithKmdNot if (status == WaitStatus::notReady) { waitForFlushStamp(flushStampToWait); // now call blocking wait, this is to ensure that task count is reached - status = waitForCompletionWithTimeout(WaitParams{false, false, 0}, taskCountToWait); + status = waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, taskCountToWait); } // If GPU hang occured, then propagate it to the caller. diff --git a/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl b/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl index 6f223d5883..6260ba8055 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_heap_addressing.inl @@ -17,7 +17,7 @@ SubmissionStatus CommandStreamReceiverHw::initializeDeviceWithFirstSu auto status = flushTagUpdate(); if (isTbxMode() && (status == SubmissionStatus::success)) { - waitForTaskCountWithKmdNotifyFallback(this->taskCount, 0, false, QueueThrottle::MEDIUM); + waitForCompletionWithTimeout({true, false, true, TimeoutControls::maxTimeout}, this->taskCount); } return status; diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.h b/shared/source/command_stream/tbx_command_stream_receiver_hw.h index 62631ccc56..56ac1643c6 100644 --- a/shared/source/command_stream/tbx_command_stream_receiver_hw.h +++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.h @@ -30,7 +30,7 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw::initAdditionalMMIO; diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.inl b/shared/source/command_stream/tbx_command_stream_receiver_hw.inl index 0d836ca2ef..6cc89f9b13 100644 --- a/shared/source/command_stream/tbx_command_stream_receiver_hw.inl +++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.inl @@ -487,7 +487,7 @@ bool TbxCommandStreamReceiverHw::expectMemory(const void *gfxAddress, } template -void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait) { +void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocations(TaskCountType taskCountToWait, bool skipAllocationsDownload) { this->flushBatchedSubmissions(); if (this->latestFlushedTaskCount < taskCountToWait) { @@ -502,6 +502,10 @@ void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocatio pollAddress = ptrOffset(pollAddress, this->immWritePostSyncWriteOffset); } + if (skipAllocationsDownload) { + return; + } + auto lockCSR = this->obtainUniqueOwnership(); for (GraphicsAllocation *graphicsAllocation : this->allocationsForDownload) { this->downloadAllocation(*graphicsAllocation); @@ -511,13 +515,13 @@ void TbxCommandStreamReceiverHw::flushSubmissionsAndDownloadAllocatio template WaitStatus TbxCommandStreamReceiverHw::waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) { - flushSubmissionsAndDownloadAllocations(taskCountToWait); + flushSubmissionsAndDownloadAllocations(taskCountToWait, false); return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, throttle); } template WaitStatus TbxCommandStreamReceiverHw::waitForCompletionWithTimeout(const WaitParams ¶ms, TaskCountType taskCountToWait) { - flushSubmissionsAndDownloadAllocations(taskCountToWait); + flushSubmissionsAndDownloadAllocations(taskCountToWait, params.skipTbxDownload); return BaseClass::waitForCompletionWithTimeout(params, taskCountToWait); } diff --git a/shared/source/command_stream/wait_status.h b/shared/source/command_stream/wait_status.h index 4efa97e1fc..0fb8e5ddc7 100644 --- a/shared/source/command_stream/wait_status.h +++ b/shared/source/command_stream/wait_status.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,8 +18,13 @@ enum class WaitStatus { }; struct WaitParams { + WaitParams() = default; + WaitParams(bool indefinitelyPoll, bool enableTimeout, bool skipTbxDownload, int64_t waitTimeout) + : indefinitelyPoll(indefinitelyPoll), enableTimeout(enableTimeout), skipTbxDownload(skipTbxDownload), waitTimeout(waitTimeout){}; + bool indefinitelyPoll = false; bool enableTimeout = false; + bool skipTbxDownload = false; int64_t waitTimeout = 0; }; diff --git a/shared/source/helpers/kmd_notify_properties.cpp b/shared/source/helpers/kmd_notify_properties.cpp index 5cd492b6d2..845185295e 100644 --- a/shared/source/helpers/kmd_notify_properties.cpp +++ b/shared/source/helpers/kmd_notify_properties.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2023 Intel Corporation + * Copyright (C) 2018-2024 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -24,7 +24,7 @@ WaitParams KmdNotifyHelper::obtainTimeoutParams(bool quickKmdSleepRequest, bool kmdWaitModeActive, bool directSubmissionEnabled) { if (throttle == QueueThrottle::HIGH) { - return WaitParams{true}; + return WaitParams{true, false, false, 0}; } if (flushStampToWait == 0) { @@ -36,7 +36,7 @@ WaitParams KmdNotifyHelper::obtainTimeoutParams(bool quickKmdSleepRequest, } if (debugManager.flags.PowerSavingMode.get() || throttle == QueueThrottle::LOW) { - return WaitParams{false, true, 1}; + return WaitParams{false, true, false, 1}; } const int64_t taskCountDiff = (currentHwTag < taskCountToWait) ? static_cast(taskCountToWait - currentHwTag) : 1; diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 2f86cf12e7..32dd60f58c 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -902,7 +902,7 @@ void MemoryManager::waitForEnginesCompletion(GraphicsAllocation &graphicsAllocat if (graphicsAllocation.isUsedByOsContext(osContextId) && engine.commandStreamReceiver->getTagAllocation() != nullptr && allocationTaskCount > *engine.commandStreamReceiver->getTagAddress()) { - engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, TimeoutControls::maxTimeout}, allocationTaskCount); + engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, false, TimeoutControls::maxTimeout}, allocationTaskCount); } } } @@ -925,7 +925,7 @@ void MemoryManager::cleanTemporaryAllocationListOnAllEngines(bool waitForComplet for (auto &engine : engineContainer) { auto csr = engine.commandStreamReceiver; if (waitForCompletion) { - csr->waitForCompletionWithTimeout(WaitParams{false, false, 0}, csr->peekLatestSentTaskCount()); + csr->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, csr->peekLatestSentTaskCount()); } csr->getInternalAllocationStorage()->cleanAllocationList(*csr->getTagAddress(), AllocationUsage::TEMPORARY_ALLOCATION); } diff --git a/shared/source/os_interface/linux/drm_command_stream.inl b/shared/source/os_interface/linux/drm_command_stream.inl index d60a72acd5..84345d9798 100644 --- a/shared/source/os_interface/linux/drm_command_stream.inl +++ b/shared/source/os_interface/linux/drm_command_stream.inl @@ -78,7 +78,7 @@ DrmCommandStreamReceiver::DrmCommandStreamReceiver(ExecutionEnvironme template inline DrmCommandStreamReceiver::~DrmCommandStreamReceiver() { if (this->isUpdateTagFromWaitEnabled()) { - this->waitForCompletionWithTimeout(WaitParams{false, false, 0}, this->peekTaskCount()); + this->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, this->peekTaskCount()); } } diff --git a/shared/source/os_interface/linux/drm_memory_operations_handler_bind.cpp b/shared/source/os_interface/linux/drm_memory_operations_handler_bind.cpp index a2da9ddfe9..82fccb58b1 100644 --- a/shared/source/os_interface/linux/drm_memory_operations_handler_bind.cpp +++ b/shared/source/os_interface/linux/drm_memory_operations_handler_bind.cpp @@ -198,7 +198,7 @@ MemoryOperationsStatus DrmMemoryOperationsHandlerBind::evictUnusedAllocationsImp break; } if (waitForCompletion) { - const auto waitStatus = engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, 0}, engine.commandStreamReceiver->peekLatestFlushedTaskCount()); + const auto waitStatus = engine.commandStreamReceiver->waitForCompletionWithTimeout(WaitParams{false, false, false, 0}, engine.commandStreamReceiver->peekLatestFlushedTaskCount()); if (waitStatus == WaitStatus::gpuHang) { return MemoryOperationsStatus::gpuHangDetectedDuringOperation; } diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 206a256138..11f3d9f7bf 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -304,7 +304,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ } WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, TaskCountType taskCountToWait) { - return waitForCompletionWithTimeout(WaitParams{false, enableTimeout, timeoutMicroseconds}, taskCountToWait); + return waitForCompletionWithTimeout(WaitParams{false, enableTimeout, false, timeoutMicroseconds}, taskCountToWait); } WaitStatus waitForTaskCountWithKmdNotifyFallback(TaskCountType taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, QueueThrottle throttle) override { @@ -523,7 +523,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw, publ std::atomic latestWaitForCompletionWithTimeoutTaskCount{0}; TaskCountType latestSentTaskCountValueDuringFlush = 0; - WaitParams latestWaitForCompletionWithTimeoutWaitParams{0}; + WaitParams latestWaitForCompletionWithTimeoutWaitParams{}; WaitUserFenceParams waitUserFenecParams; WriteMemoryParams writeMemoryParams; TaskCountType flushBcsTaskReturnValue{}; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 045b2c0c84..42c7f66470 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -5349,15 +5349,16 @@ HWTEST_F(CommandStreamReceiverTest, givenTbxCsrWhenInitializingThenWaitForComple commandStreamReceiver.initializeTagAllocation(); EXPECT_EQ(0u, commandStreamReceiver.taskCount); - EXPECT_EQ(0u, commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackCalled); + EXPECT_EQ(0u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled); EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice)); EXPECT_EQ(1u, commandStreamReceiver.taskCount); - EXPECT_EQ(1u, commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackCalled); + EXPECT_EQ(1u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled); + EXPECT_TRUE(commandStreamReceiver.latestWaitForCompletionWithTimeoutWaitParams.skipTbxDownload); EXPECT_EQ(SubmissionStatus::success, commandStreamReceiver.initializeDeviceWithFirstSubmission(*pDevice)); EXPECT_EQ(1u, commandStreamReceiver.taskCount); - EXPECT_EQ(1u, commandStreamReceiver.waitForTaskCountWithKmdNotifyFallbackCalled); + EXPECT_EQ(1u, commandStreamReceiver.waitForCompletionWithTimeoutTaskCountCalled); MockCsrHw failingCommandStreamReceiver(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield()); failingCommandStreamReceiver.commandStreamReceiverType = CommandStreamReceiverType::tbx; diff --git a/shared/test/unit_test/command_stream/tbx_command_stream_tests.cpp b/shared/test/unit_test/command_stream/tbx_command_stream_tests.cpp index afbf9fb488..03969f63a0 100644 --- a/shared/test/unit_test/command_stream/tbx_command_stream_tests.cpp +++ b/shared/test/unit_test/command_stream/tbx_command_stream_tests.cpp @@ -437,7 +437,7 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenTbxCsrWhenCallingWaitForCompletionWithT tbxCsr.allocationsForDownload = {&allocation1, &allocation2, &allocation3}; - tbxCsr.waitForCompletionWithTimeout(WaitParams{false, true, 0}, 0); + tbxCsr.waitForCompletionWithTimeout(WaitParams{false, true, false, 0}, 0); std::set expectedDownloadedAllocations = {tbxCsr.getTagAllocation(), &allocation1, &allocation2, &allocation3}; EXPECT_EQ(expectedDownloadedAllocations, tbxCsr.downloadedAllocations); @@ -456,7 +456,7 @@ HWTEST_F(TbxCommandSteamSimpleTest, givenLatestFlushedTaskCountLowerThanTagWhenF EXPECT_FALSE(tbxCsr.flushTagCalled); EXPECT_EQ(0u, tbxCsr.obtainUniqueOwnershipCalled); - tbxCsr.flushSubmissionsAndDownloadAllocations(1u); + tbxCsr.flushSubmissionsAndDownloadAllocations(1u, false); EXPECT_EQ(1u, tbxCsr.obtainUniqueOwnershipCalled); EXPECT_TRUE(tbxCsr.flushTagCalled);