From 814b0ecb872a9526610340d304441b77e1d03325 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Thu, 17 Mar 2022 09:43:17 +0000 Subject: [PATCH] Improve wait function for Fence object Related-To: NEO-6242 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/fence/fence.cpp | 3 -- .../unit_tests/sources/fence/test_fence.cpp | 54 +++++++++++++++++++ .../command_stream_receiver.cpp | 12 ++++- 3 files changed, 64 insertions(+), 5 deletions(-) diff --git a/level_zero/core/source/fence/fence.cpp b/level_zero/core/source/fence/fence.cpp index 74747a9257..7c76837508 100644 --- a/level_zero/core/source/fence/fence.cpp +++ b/level_zero/core/source/fence/fence.cpp @@ -11,7 +11,6 @@ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/string.h" #include "shared/source/memory_manager/memory_manager.h" -#include "shared/source/utilities/wait_util.h" namespace L0 { @@ -69,8 +68,6 @@ ze_result_t Fence::hostSynchronize(uint64_t timeout) { return ZE_RESULT_SUCCESS; } - NEO::WaitUtils::waitFunction(nullptr, 0u); - currentTime = std::chrono::high_resolution_clock::now(); elapsedTimeSinceGpuHangCheck = std::chrono::duration_cast(currentTime - lastHangCheckTime); diff --git a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp index f96a5af132..d5583c2a50 100644 --- a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp +++ b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp @@ -22,6 +22,14 @@ using namespace std::chrono_literals; +namespace CpuIntrinsicsTests { +extern std::atomic pauseCounter; +extern volatile uint32_t *pauseAddress; +extern uint32_t pauseValue; +extern uint32_t pauseOffset; +extern std::function setupPauseAddress; +} // namespace CpuIntrinsicsTests + namespace L0 { namespace ult { @@ -193,6 +201,52 @@ TEST_F(FenceSynchronizeTest, givenCallToFenceHostSynchronizeWithTimeoutNonZeroAn EXPECT_EQ(ZE_RESULT_SUCCESS, result); } +TEST_F(FenceSynchronizeTest, givenInfiniteTimeoutWhenWaitingForFenceCompletionThenReturnOnlyAfterAllCsrPartitionsCompleted) { + constexpr uint32_t activePartitions = 2; + constexpr uint32_t postSyncOffset = 16; + + const auto csr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield()); + ASSERT_NE(nullptr, csr->getTagAddress()); + csr->postSyncWriteOffset = postSyncOffset; + csr->activePartitions = activePartitions; + + Mock cmdqueue(device, csr.get()); + ze_fence_desc_t desc = {}; + + std::unique_ptr> fence; + fence.reset(whitebox_cast(Fence::create(&cmdqueue, &desc))); + ASSERT_NE(nullptr, fence); + + fence->taskCount = 1; + + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, 0); + VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); + VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); + CpuIntrinsicsTests::pauseCounter = 0u; + CpuIntrinsicsTests::pauseAddress = csr->getTagAddress(); + + volatile uint32_t *hostAddr = csr->getTagAddress(); + for (uint32_t i = 0; i < activePartitions; i++) { + *hostAddr = 0; + hostAddr = ptrOffset(hostAddr, postSyncOffset); + } + + CpuIntrinsicsTests::setupPauseAddress = [&]() { + if (CpuIntrinsicsTests::pauseCounter > 10) { + volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; + for (uint32_t i = 0; i < activePartitions; i++) { + *nextPacket = 1; + nextPacket = ptrOffset(nextPacket, postSyncOffset); + } + } + }; + + constexpr uint64_t infiniteTimeout = std::numeric_limits::max(); + auto result = fence->hostSynchronize(infiniteTimeout); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + using FenceAubCsrTest = Test; HWTEST_F(FenceAubCsrTest, givenCallToFenceHostSynchronizeWithAubModeCsrReturnsSuccess) { diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index b6c80ed781..a9c8ecfbcc 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -366,7 +366,15 @@ WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddres partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset); } - return testTaskCountReady(pollAddress, taskCountToWait) ? WaitStatus::Ready : WaitStatus::NotReady; + partitionAddress = pollAddress; + for (uint32_t i = 0; i < activePartitions; i++) { + if (*partitionAddress < taskCountToWait) { + return WaitStatus::NotReady; + } + partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset); + } + + return WaitStatus::Ready; } void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) { @@ -800,7 +808,7 @@ bool CommandStreamReceiver::checkImplicitFlushForGpuIdle() { bool CommandStreamReceiver::testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait) { for (uint32_t i = 0; i < activePartitions; i++) { - if (*pollAddress < taskCountToWait) { + if (!WaitUtils::waitFunction(pollAddress, taskCountToWait)) { return false; }