diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index a288bf4b16..168daf9665 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -100,7 +100,11 @@ ze_result_t EventImp::queryStatusKernelTimestamp() { for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { - if (kernelEventCompletionData[i].getContextEndValue(packetId) == queryVal) { + bool ready = NEO::WaitUtils::waitFunctionWithPredicate( + static_cast(kernelEventCompletionData[i].getContextEndAddress(packetId)), + queryVal, + std::not_equal_to()); + if (!ready) { return ZE_RESULT_NOT_READY; } } @@ -116,7 +120,11 @@ ze_result_t EventImp::queryStatusNonTimestamp() { for (uint32_t i = 0; i < kernelCount; i++) { uint32_t packetsToCheck = kernelEventCompletionData[i].getPacketsUsed(); for (uint32_t packetId = 0; packetId < packetsToCheck; packetId++) { - if (kernelEventCompletionData[i].getContextStartValue(packetId) == queryVal) { + bool ready = NEO::WaitUtils::waitFunctionWithPredicate( + static_cast(kernelEventCompletionData[i].getContextStartAddress(packetId)), + queryVal, + std::not_equal_to()); + if (!ready) { return ZE_RESULT_NOT_READY; } } @@ -221,8 +229,6 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { return ret; } - NEO::WaitUtils::waitFunction(nullptr, 0u); - currentTime = std::chrono::high_resolution_clock::now(); elapsedTimeSinceGpuHangCheck = std::chrono::duration_cast(currentTime - lastHangCheckTime); @@ -233,7 +239,7 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { } } - if (timeout == std::numeric_limits::max()) { + if (timeout == std::numeric_limits::max()) { continue; } diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index 02b06f130b..764be39d60 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -6,6 +6,7 @@ */ #include "shared/test/common/helpers/debug_manager_state_restore.h" +#include "shared/test/common/helpers/variable_backup.h" #include "shared/test/common/mocks/mock_compilers.h" #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_memory_manager.h" @@ -29,8 +30,11 @@ using namespace std::chrono_literals; namespace CpuIntrinsicsTests { -extern std::atomic lastClFlushedPtr; -extern std::atomic clFlushCounter; +extern std::atomic pauseCounter; +extern volatile uint32_t *pauseAddress; +extern uint32_t pauseValue; +extern uint32_t pauseOffset; +extern std::function setupPauseAddress; } // namespace CpuIntrinsicsTests namespace L0 { @@ -627,8 +631,8 @@ TEST_F(EventSynchronizeTest, GivenGpuHangWhenHostSynchronizeIsCalledThenDeviceLo event->csr = csr.get(); event->gpuHangCheckPeriod = 0ms; - const auto timeout = std::numeric_limits::max(); - const auto result = event->hostSynchronize(timeout); + constexpr uint64_t timeout = std::numeric_limits::max(); + auto result = event->hostSynchronize(timeout); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); } @@ -640,8 +644,8 @@ TEST_F(EventSynchronizeTest, GivenNoGpuHangAndOneNanosecondTimeoutWhenHostSynchr event->csr = csr.get(); event->gpuHangCheckPeriod = 0ms; - const auto timeoutNanoseconds = 1; - const auto result = event->hostSynchronize(timeoutNanoseconds); + constexpr uint64_t timeoutNanoseconds = 1; + auto result = event->hostSynchronize(timeoutNanoseconds); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } @@ -651,8 +655,8 @@ TEST_F(EventSynchronizeTest, GivenLongPeriodOfGpuCheckAndOneNanosecondTimeoutWhe event->csr = csr.get(); event->gpuHangCheckPeriod = 50000000ms; - const auto timeoutNanoseconds = 1; - const auto result = event->hostSynchronize(timeoutNanoseconds); + constexpr uint64_t timeoutNanoseconds = 1; + auto result = event->hostSynchronize(timeoutNanoseconds); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } @@ -668,19 +672,90 @@ TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithNonZeroTimeoutAn } TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutZeroAndStateSignaledHostSynchronizeReturnsSuccess) { - uint64_t *hostAddr = static_cast(event->getHostAddress()); + uint32_t *hostAddr = static_cast(event->getHostAddress()); *hostAddr = Event::STATE_SIGNALED; ze_result_t result = event->hostSynchronize(0); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } TEST_F(EventSynchronizeTest, givenCallToEventHostSynchronizeWithTimeoutNonZeroAndStateSignaledHostSynchronizeReturnsSuccess) { - uint64_t *hostAddr = static_cast(event->getHostAddress()); + uint32_t *hostAddr = static_cast(event->getHostAddress()); *hostAddr = Event::STATE_SIGNALED; ze_result_t result = event->hostSynchronize(10); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } +TEST_F(EventSynchronizeTest, givenInfiniteTimeoutWhenWaitingForNonTimestampEventCompletionThenReturnOnlyAfterAllEventPacketsAreCompleted) { + constexpr uint32_t packetsInUse = 2; + event->setPacketsInUse(packetsInUse); + + const size_t eventPacketSize = event->getSinglePacketSize(); + const size_t eventCompletionOffset = event->getContextStartOffset(); + + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); + VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); + VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); + CpuIntrinsicsTests::pauseCounter = 0u; + CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); + + uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); + for (uint32_t i = 0; i < packetsInUse; i++) { + *hostAddr = Event::STATE_CLEARED; + hostAddr = ptrOffset(hostAddr, eventPacketSize); + } + + CpuIntrinsicsTests::setupPauseAddress = [&]() { + if (CpuIntrinsicsTests::pauseCounter > 10) { + volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; + for (uint32_t i = 0; i < packetsInUse; i++) { + *nextPacket = Event::STATE_SIGNALED; + nextPacket = ptrOffset(nextPacket, eventPacketSize); + } + } + }; + + constexpr uint64_t infiniteTimeout = std::numeric_limits::max(); + ze_result_t result = event->hostSynchronize(infiniteTimeout); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + +TEST_F(EventSynchronizeTest, givenInfiniteTimeoutWhenWaitingForTimestampEventCompletionThenReturnOnlyAfterAllEventPacketsAreCompleted) { + constexpr uint32_t packetsInUse = 2; + event->setPacketsInUse(packetsInUse); + event->setEventTimestampFlag(true); + + const size_t eventPacketSize = event->getSinglePacketSize(); + const size_t eventCompletionOffset = event->getContextEndOffset(); + + VariableBackup backupPauseAddress(&CpuIntrinsicsTests::pauseAddress); + VariableBackup backupPauseValue(&CpuIntrinsicsTests::pauseValue, Event::STATE_CLEARED); + VariableBackup backupPauseOffset(&CpuIntrinsicsTests::pauseOffset); + VariableBackup> backupSetupPauseAddress(&CpuIntrinsicsTests::setupPauseAddress); + CpuIntrinsicsTests::pauseCounter = 0u; + CpuIntrinsicsTests::pauseAddress = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); + + uint32_t *hostAddr = static_cast(ptrOffset(event->getHostAddress(), eventCompletionOffset)); + for (uint32_t i = 0; i < packetsInUse; i++) { + *hostAddr = Event::STATE_CLEARED; + hostAddr = ptrOffset(hostAddr, eventPacketSize); + } + + CpuIntrinsicsTests::setupPauseAddress = [&]() { + if (CpuIntrinsicsTests::pauseCounter > 10) { + volatile uint32_t *nextPacket = CpuIntrinsicsTests::pauseAddress; + for (uint32_t i = 0; i < packetsInUse; i++) { + *nextPacket = Event::STATE_SIGNALED; + nextPacket = ptrOffset(nextPacket, eventPacketSize); + } + } + }; + + constexpr uint64_t infiniteTimeout = std::numeric_limits::max(); + ze_result_t result = event->hostSynchronize(infiniteTimeout); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); +} + using EventPoolIPCEventResetTests = Test; TEST_F(EventPoolIPCEventResetTests, whenOpeningIpcHandleForEventPoolCreateWithIpcFlagThenEventsInNewPoolAreNotReset) { diff --git a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp index 5e543d146f..f96a5af132 100644 --- a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp +++ b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp @@ -71,8 +71,8 @@ TEST_F(FenceTest, GivenGpuHangWhenHostSynchronizeIsCalledThenDeviceLostIsReturne fence->taskCount = 1; fence->gpuHangCheckPeriod = 0ms; - const auto timeout = std::numeric_limits::max(); - const auto result = fence->hostSynchronize(timeout); + constexpr uint64_t timeout = std::numeric_limits::max(); + auto result = fence->hostSynchronize(timeout); EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); } @@ -92,8 +92,8 @@ TEST_F(FenceTest, GivenNoGpuHangAndOneNanosecondTimeoutWhenHostSynchronizeIsCall fence->taskCount = 1; fence->gpuHangCheckPeriod = 0ms; - const auto timeoutNanoseconds = 1; - const auto result = fence->hostSynchronize(timeoutNanoseconds); + constexpr uint64_t timeoutNanoseconds = 1; + auto result = fence->hostSynchronize(timeoutNanoseconds); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } @@ -112,8 +112,8 @@ TEST_F(FenceTest, GivenLongPeriodOfGpuCheckAndOneNanosecondTimeoutWhenHostSynchr fence->taskCount = 1; fence->gpuHangCheckPeriod = 50000000ms; - const auto timeoutNanoseconds = 1; - const auto result = fence->hostSynchronize(timeoutNanoseconds); + constexpr uint64_t timeoutNanoseconds = 1; + auto result = fence->hostSynchronize(timeoutNanoseconds); EXPECT_EQ(ZE_RESULT_NOT_READY, result); } @@ -131,8 +131,8 @@ TEST_F(FenceTest, GivenSuccessfulQueryResultAndNoTimeoutWhenHostSynchronizeIsCal fence->taskCount = 1; - const auto timeout = std::numeric_limits::max(); - const auto result = fence->hostSynchronize(timeout); + constexpr uint64_t timeout = std::numeric_limits::max(); + auto result = fence->hostSynchronize(timeout); EXPECT_EQ(ZE_RESULT_SUCCESS, result); } diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index c7cadf485e..24a24beb98 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -71,6 +71,7 @@ class TimestampPackets : public TagTypeBase { uint64_t getGlobalEndValue(uint32_t packetIndex) const { return static_cast(packets[packetIndex].globalEnd); } void const *getContextEndAddress(uint32_t packetIndex) const { return static_cast(&packets[packetIndex].contextEnd); } + void const *getContextStartAddress(uint32_t packetIndex) const { return static_cast(&packets[packetIndex].contextStart); } protected: Packet packets[TimestampPacketSizeControl::preferredPacketCount];