From 7f729b7f89b9566e655b7eb80e80712e73c68931 Mon Sep 17 00:00:00 2001 From: Patryk Wrobel Date: Tue, 22 Feb 2022 12:51:29 +0000 Subject: [PATCH] Detect GPU hang in clWaitForEvents This change: - moves NEO::WaitStatus to a separate file - enables detection of GPU hang in clWaitForEvents - adjusts most of blocking calls in CommandStreamReceiver to return WaitStatus - adds ULTs to cover the new code Related-To: NEO-6681 Signed-off-by: Patryk Wrobel --- level_zero/core/source/cmdqueue/cmdqueue.cpp | 1 + .../sources/cmdqueue/test_cmdqueue_2.cpp | 1 + opencl/source/command_queue/command_queue.cpp | 38 +++- opencl/source/command_queue/command_queue.h | 10 +- opencl/source/event/event.cpp | 33 ++- opencl/source/event/event.h | 11 +- opencl/source/event/user_event.cpp | 15 +- opencl/source/event/user_event.h | 6 +- .../api/cl_enqueue_wait_for_events_tests.inl | 7 +- .../command_queue/command_queue_tests.cpp | 208 +++++++++++++++++- .../command_queue/enqueue_handler_tests.cpp | 5 +- .../command_queue/enqueue_kernel_2_tests.cpp | 5 +- ...and_stream_receiver_flush_task_3_tests.cpp | 1 + ...and_stream_receiver_flush_task_4_tests.cpp | 1 + .../command_stream_receiver_hw_1_tests.cpp | 1 + .../command_stream_receiver_hw_2_tests.cpp | 1 + .../event/async_events_handler_tests.cpp | 5 +- opencl/test/unit_test/event/event_fixture.h | 5 +- opencl/test/unit_test/event/event_tests.cpp | 55 ++++- .../unit_test/event/user_events_tests.cpp | 7 +- .../unit_test/helpers/kmd_notify_tests.cpp | 7 +- opencl/test/unit_test/kernel/kernel_tests.cpp | 1 + .../unit_test/mem_obj/buffer_bcs_tests.cpp | 5 +- .../mem_obj/mem_obj_destruction_tests.cpp | 1 + .../test/unit_test/mocks/mock_command_queue.h | 10 +- opencl/test/unit_test/mocks/mock_event.h | 16 +- .../windows/device_command_stream_tests.cpp | 7 +- shared/source/command_stream/CMakeLists.txt | 1 + .../aub_command_stream_receiver_hw.h | 1 + .../aub_command_stream_receiver_hw_base.inl | 1 + .../command_stream_receiver.cpp | 19 +- .../command_stream/command_stream_receiver.h | 13 +- .../command_stream_receiver_hw.h | 1 + .../command_stream_receiver_hw_base.inl | 1 + .../command_stream_receiver_with_aub_dump.h | 1 + .../tbx_command_stream_receiver_hw.h | 1 + shared/source/command_stream/wait_status.h | 18 ++ .../libult/ult_command_stream_receiver.h | 1 + shared/test/common/mocks/mock_aub_csr.h | 1 + .../mocks/mock_command_stream_receiver.h | 1 + .../command_stream_receiver_tests.cpp | 59 ++++- 41 files changed, 487 insertions(+), 95 deletions(-) create mode 100644 shared/source/command_stream/wait_status.h diff --git a/level_zero/core/source/cmdqueue/cmdqueue.cpp b/level_zero/core/source/cmdqueue/cmdqueue.cpp index ad85e80534..85e92ab799 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.cpp +++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp @@ -8,6 +8,7 @@ #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/memory_manager/memory_manager.h" diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index 09b409b7e0..1454408368 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_stream/scratch_space_controller_xehp_and_later.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index ffe7e3945f..81cb7ba931 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -229,19 +229,25 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState return false; } -void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) { +WaitStatus CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) { WAIT_ENTER() + WaitStatus waitStatus{WaitStatus::Ready}; + DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait); DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag()); if (!skipWait) { bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW; - getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, - flushStampToWait, - useQuickKmdSleep, - forcePowerSavingMode); + waitStatus = getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait, + flushStampToWait, + useQuickKmdSleep, + forcePowerSavingMode); + if (waitStatus == WaitStatus::GpuHang) { + return WaitStatus::GpuHang; + } + DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait); if (gtpinIsGTPinInitialized()) { @@ -251,17 +257,25 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, RangewaitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false); - bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount); + + waitStatus = bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false); + if (waitStatus == WaitStatus::GpuHang) { + return WaitStatus::GpuHang; + } + + waitStatus = bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount); + if (waitStatus == WaitStatus::GpuHang) { + return WaitStatus::GpuHang; + } } - if (cleanTemporaryAllocationList) { - getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait); - } else { - getGpgpuCommandStreamReceiver().waitForTaskCount(gpgpuTaskCountToWait); - } + waitStatus = cleanTemporaryAllocationList + ? getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait) + : getGpgpuCommandStreamReceiver().waitForTaskCount(gpgpuTaskCountToWait); WAIT_LEAVE() + + return waitStatus; } bool CommandQueue::isQueueBlocked() { diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 5428dcfc39..faa4fa5281 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -209,9 +209,9 @@ class CommandQueue : public BaseObject<_cl_command_queue> { MOCKABLE_VIRTUAL bool isQueueBlocked(); - MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait); - MOCKABLE_VIRTUAL void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { - this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, false); + MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait); + MOCKABLE_VIRTUAL WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) { + return this->waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, true, false); } MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList); MOCKABLE_VIRTUAL void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) { @@ -223,7 +223,7 @@ class CommandQueue : public BaseObject<_cl_command_queue> { const cl_event *eventWaitList); MOCKABLE_VIRTUAL CommandStreamReceiver &getGpgpuCommandStreamReceiver() const; - CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const; + MOCKABLE_VIRTUAL CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const; CommandStreamReceiver *getBcsForAuxTranslation() const; MOCKABLE_VIRTUAL CommandStreamReceiver &selectCsrForBuiltinOperation(const CsrSelectionArgs &args) const; Device &getDevice() const noexcept; diff --git a/opencl/source/event/event.cpp b/opencl/source/event/event.cpp index 611606e633..ce0d31e982 100644 --- a/opencl/source/event/event.cpp +++ b/opencl/source/event/event.cpp @@ -27,6 +27,8 @@ #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/mem_obj/mem_obj.h" +#include + namespace NEO { Event::Event( @@ -417,15 +419,18 @@ void Event::getBoundaryTimestampValues(TimestampPacketContainer *timestampContai } } -inline bool Event::wait(bool blocking, bool useQuickKmdSleep) { +inline WaitStatus Event::wait(bool blocking, bool useQuickKmdSleep) { while (this->taskCount == CompletionStamp::notReady) { if (blocking == false) { - return false; + return WaitStatus::NotReady; } } Range states{&bcsState, bcsState.isValid() ? 1u : 0u}; - cmdQueue->waitUntilComplete(taskCount.load(), states, flushStamp->peekStamp(), useQuickKmdSleep); + const auto waitStatus = cmdQueue->waitUntilComplete(taskCount.load(), states, flushStamp->peekStamp(), useQuickKmdSleep); + if (waitStatus == WaitStatus::GpuHang) { + return WaitStatus::GpuHang; + } updateExecutionStatus(); DEBUG_BREAK_IF(this->taskLevel == CompletionStamp::notReady && this->executionStatus >= 0); @@ -433,7 +438,7 @@ inline bool Event::wait(bool blocking, bool useQuickKmdSleep) { auto *allocationStorage = cmdQueue->getGpgpuCommandStreamReceiver().getInternalAllocationStorage(); allocationStorage->cleanAllocationList(this->taskCount, TEMPORARY_ALLOCATION); - return true; + return WaitStatus::Ready; } void Event::updateExecutionStatus() { @@ -630,16 +635,23 @@ cl_int Event::waitForEvents(cl_uint numEvents, // pointers to workerLists - for fast swap operations WorkerListT *currentlyPendingEvents = &workerList1; WorkerListT *pendingEventsLeft = &workerList2; + WaitStatus eventWaitStatus = WaitStatus::NotReady; while (currentlyPendingEvents->size() > 0) { - for (auto &e : *currentlyPendingEvents) { - Event *event = castToObjectOrAbort(e); + for (auto current = currentlyPendingEvents->begin(), end = currentlyPendingEvents->end(); current != end; ++current) { + Event *event = castToObjectOrAbort(*current); if (event->peekExecutionStatus() < CL_COMPLETE) { return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; } - if (event->wait(false, false) == false) { + eventWaitStatus = event->wait(false, false); + if (eventWaitStatus == WaitStatus::NotReady) { pendingEventsLeft->push_back(event); + } else if (eventWaitStatus == WaitStatus::GpuHang) { + setExecutionStatusToAbortedDueToGpuHang(pendingEventsLeft->begin(), pendingEventsLeft->end()); + setExecutionStatusToAbortedDueToGpuHang(current, end); + + return CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST; } } @@ -650,6 +662,13 @@ cl_int Event::waitForEvents(cl_uint numEvents, return CL_SUCCESS; } +inline void Event::setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_event *last) { + std::for_each(first, last, [](cl_event &e) { + Event *event = castToObjectOrAbort(e); + event->transitionExecutionStatus(executionAbortedDueToGpuHang); + }); +} + uint32_t Event::getTaskLevel() { return taskLevel; } diff --git a/opencl/source/event/event.h b/opencl/source/event/event.h index a3d364a8de..7ef9ad275a 100644 --- a/opencl/source/event/event.h +++ b/opencl/source/event/event.h @@ -1,11 +1,12 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/flush_stamp.h" #include "shared/source/os_interface/os_time.h" #include "shared/source/os_interface/performance_counters.h" @@ -80,6 +81,7 @@ class Event : public BaseObject<_cl_event>, public IDNode { }; static const cl_ulong objectMagic = 0x80134213A43C981ALL; + static constexpr cl_int executionAbortedDueToGpuHang = -777; Event(CommandQueue *cmdQueue, cl_command_type cmdType, uint32_t taskLevel, uint32_t taskCount); @@ -206,9 +208,8 @@ class Event : public BaseObject<_cl_event>, public IDNode { // adds a callback (execution state change listener) to this event's list of callbacks void addCallback(Callback::ClbFuncT fn, cl_int type, void *data); - //returns true on success - //if(blocking==false), will return with false instead of blocking while waiting for completion - virtual bool wait(bool blocking, bool useQuickKmdSleep); + //if(blocking==false), will return with WaitStatus::NotReady instead of blocking while waiting for completion + virtual WaitStatus wait(bool blocking, bool useQuickKmdSleep); bool isUserEvent() const { return (CL_COMMAND_USER == cmdType); @@ -347,6 +348,8 @@ class Event : public BaseObject<_cl_event>, public IDNode { void unblockEventsBlockedByThis(int32_t transitionStatus); void submitCommand(bool abortBlockedTasks); + static void setExecutionStatusToAbortedDueToGpuHang(cl_event *first, cl_event *last); + bool currentCmdQVirtualEvent; std::atomic cmdToSubmit; std::atomic submittedCmd; diff --git a/opencl/source/event/user_event.cpp b/opencl/source/event/user_event.cpp index 06ce2863e5..1631f46228 100644 --- a/opencl/source/event/user_event.cpp +++ b/opencl/source/event/user_event.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -24,13 +24,13 @@ void UserEvent::updateExecutionStatus() { return; } -bool UserEvent::wait(bool blocking, bool useQuickKmdSleep) { +WaitStatus UserEvent::wait(bool blocking, bool useQuickKmdSleep) { while (updateStatusAndCheckCompletion() == false) { if (blocking == false) { - return false; + return WaitStatus::NotReady; } } - return true; + return WaitStatus::Ready; } uint32_t UserEvent::getTaskLevel() { @@ -53,16 +53,15 @@ VirtualEvent::VirtualEvent(CommandQueue *cmdQ, Context *ctx) } void VirtualEvent::updateExecutionStatus() { - ; } -bool VirtualEvent::wait(bool blocking, bool useQuickKmdSleep) { +WaitStatus VirtualEvent::wait(bool blocking, bool useQuickKmdSleep) { while (updateStatusAndCheckCompletion() == false) { if (blocking == false) { - return false; + return WaitStatus::NotReady; } } - return true; + return WaitStatus::Ready; } uint32_t VirtualEvent::getTaskLevel() { diff --git a/opencl/source/event/user_event.h b/opencl/source/event/user_event.h index b6b7216a2c..e5c69027f3 100644 --- a/opencl/source/event/user_event.h +++ b/opencl/source/event/user_event.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -18,7 +18,7 @@ class UserEvent : public Event { ~UserEvent() override = default; - bool wait(bool blocking, bool useQuickKmdSleep) override; + WaitStatus wait(bool blocking, bool useQuickKmdSleep) override; void updateExecutionStatus() override; @@ -33,7 +33,7 @@ class VirtualEvent : public Event { ~VirtualEvent() override = default; - bool wait(bool blocking, bool useQuickKmdSleep) override; + WaitStatus wait(bool blocking, bool useQuickKmdSleep) override; bool setStatus(cl_int status) override; diff --git a/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl b/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl index 482cc91ae0..e7a44e5a60 100644 --- a/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl +++ b/opencl/test/unit_test/api/cl_enqueue_wait_for_events_tests.inl @@ -1,10 +1,11 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/array_count.h" #include "opencl/source/command_queue/command_queue.h" @@ -60,9 +61,9 @@ TEST_F(clEnqueueWaitForEventsTests, GivenProperParamsWhenClEnqueueWaitForEventsI MyEvent(Context *context) : UserEvent(context) { } - bool wait(bool blocking, bool quickKmdSleep) override { + WaitStatus wait(bool blocking, bool quickKmdSleep) override { wasWaitCalled = true; - return true; + return WaitStatus::Ready; }; bool wasWaitCalled = false; }; diff --git a/opencl/test/unit_test/command_queue/command_queue_tests.cpp b/opencl/test/unit_test/command_queue/command_queue_tests.cpp index d0d0e3f86c..052bb38885 100644 --- a/opencl/test/unit_test/command_queue/command_queue_tests.cpp +++ b/opencl/test/unit_test/command_queue/command_queue_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/basic_math.h" #include "shared/source/helpers/engine_node_helper.h" @@ -903,9 +904,11 @@ struct WaitForQueueCompletionTests : public ::testing::Test { template struct MyCmdQueue : public CommandQueueHw { MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { requestedUseQuickKmdSleep = useQuickKmdSleep; waitUntilCompleteCounter++; + + return WaitStatus::Ready; } bool isQueueBlocked() override { return false; @@ -957,16 +960,29 @@ class CommandStreamReceiverHwMock : public CommandStreamReceiverHw { uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield) : CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {} - bool wiatForTaskCountCalled = false; WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override { - return WaitStatus::Ready; + waitForTaskCountWithKmdNotifyFallbackCounter++; + return waitForTaskCountWithKmdNotifyFallbackReturnValue; } - void waitForTaskCount(uint32_t requiredTaskCount) override { - wiatForTaskCountCalled = true; - return; + WaitStatus waitForTaskCount(uint32_t requiredTaskCount) override { + waitForTaskCountCalledCounter++; + return waitForTaskCountReturnValue; } + + WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { + waitForTaskCountAndCleanTemporaryAllocationListCalledCounter++; + return waitForTaskCountAndCleanTemporaryAllocationListReturnValue; + } + + int waitForTaskCountCalledCounter{0}; + int waitForTaskCountWithKmdNotifyFallbackCounter{0}; + int waitForTaskCountAndCleanTemporaryAllocationListCalledCounter{0}; + + WaitStatus waitForTaskCountReturnValue{WaitStatus::Ready}; + WaitStatus waitForTaskCountWithKmdNotifyFallbackReturnValue{WaitStatus::Ready}; + WaitStatus waitForTaskCountAndCleanTemporaryAllocationListReturnValue{WaitStatus::Ready}; }; struct WaitUntilCompletionTests : public ::testing::Test { @@ -976,6 +992,12 @@ struct WaitUntilCompletionTests : public ::testing::Test { using CommandQueue::gpgpuEngine; MyCmdQueue(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false){}; + + CommandStreamReceiver *getBcsCommandStreamReceiver(aub_stream::EngineType bcsEngineType) const override { + return bcsCsrToReturn; + } + + CommandStreamReceiver *bcsCsrToReturn{nullptr}; }; void SetUp() override { @@ -987,20 +1009,182 @@ struct WaitUntilCompletionTests : public ::testing::Test { std::unique_ptr context; }; -HWTEST_F(WaitUntilCompletionTests, givenCommandQueueAndCleanTemporaryAllocationListWhenWaitUntilCompleteThenWaitForTaskCountIsCalled) { +HWTEST_F(WaitUntilCompletionTests, givenCleanTemporaryAllocationListEqualsFalseWhenWaitingUntilCompleteThenWaitForTaskCountIsCalledAndItsReturnValueIsPropagated) { std::unique_ptr> cmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); cmdStream->initializeTagAllocation(); + cmdStream->waitForTaskCountReturnValue = WaitStatus::Ready; + std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; - cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); - uint32_t taskCount = 0u; + + constexpr uint32_t taskCount = 0u; + constexpr bool cleanTemporaryAllocationList = false; StackVec activeBcsStates{}; - cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, false); - auto cmdStreamPtr = &device->getGpgpuCommandStreamReceiver(); + const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, cleanTemporaryAllocationList, false); + EXPECT_EQ(WaitStatus::Ready, waitStatus); + EXPECT_EQ(1, cmdStream->waitForTaskCountCalledCounter); - EXPECT_TRUE(static_cast *>(cmdStreamPtr)->wiatForTaskCountCalled); + cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; +} + +HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndCleanTemporaryAllocationListEqualsTrueWhenWaitingUntilCompleteThenWaitForTaskCountAndCleanAllocationIsCalledAndGpuHangIsReturned) { + std::unique_ptr> cmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); + cmdStream->initializeTagAllocation(); + cmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::GpuHang; + + std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); + CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; + cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); + + constexpr uint32_t taskCount = 0u; + constexpr bool cleanTemporaryAllocationList = true; + StackVec activeBcsStates{}; + + const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, cleanTemporaryAllocationList, false); + EXPECT_EQ(WaitStatus::GpuHang, waitStatus); + EXPECT_EQ(1, cmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); + + cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; +} + +HWTEST_F(WaitUntilCompletionTests, givenEmptyBcsStatesAndSkipWaitEqualsTrueWhenWaitingUntilCompleteThenWaitForTaskCountWithKmdNotifyFallbackIsNotCalled) { + std::unique_ptr> cmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); + cmdStream->initializeTagAllocation(); + + std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); + CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; + cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); + + constexpr uint32_t taskCount = 0u; + constexpr bool skipWait = true; + StackVec activeBcsStates{}; + + cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); + EXPECT_EQ(0, cmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); + + cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; +} + +HWTEST_F(WaitUntilCompletionTests, givenGpuHangAndSkipWaitEqualsFalseWhenWaitingUntilCompleteThenOnlyWaitForTaskCountWithKmdNotifyFallbackIsCalledAndGpuHangIsReturned) { + std::unique_ptr> cmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); + cmdStream->initializeTagAllocation(); + cmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang; + + std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); + CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; + cmdQ->gpgpuEngine->commandStreamReceiver = cmdStream.get(); + + constexpr uint32_t taskCount = 0u; + constexpr bool skipWait = false; + StackVec activeBcsStates{}; + + const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); + EXPECT_EQ(WaitStatus::GpuHang, waitStatus); + + EXPECT_EQ(0, cmdStream->waitForTaskCountCalledCounter); + EXPECT_EQ(1, cmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); + EXPECT_EQ(0, cmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); + + cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; +} + +HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteThenOnlyWaitForTaskCountWithKmdNotifyFallbackIsCalledOnBcsCsrAndGpuHangIsReturned) { + std::unique_ptr> gpgpuCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); + gpgpuCmdStream->initializeTagAllocation(); + gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; + + std::unique_ptr> bcsCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); + bcsCmdStream->initializeTagAllocation(); + bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::GpuHang; + + std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); + CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; + cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get(); + cmdQ->bcsCsrToReturn = bcsCmdStream.get(); + + constexpr uint32_t taskCount = 0u; + constexpr bool skipWait = false; + StackVec activeBcsStates{CopyEngineState{}}; + + const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); + EXPECT_EQ(WaitStatus::GpuHang, waitStatus); + + EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountCalledCounter); + EXPECT_EQ(1, gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); + EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); + + EXPECT_EQ(0, bcsCmdStream->waitForTaskCountCalledCounter); + EXPECT_EQ(1, bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); + EXPECT_EQ(0, bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); + + cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; +} + +HWTEST_F(WaitUntilCompletionTests, givenGpuHangOnBcsCsrWhenWaitingUntilCompleteThenWaitForTaskCountAndCleanTemporaryAllocationListIsCalledOnBcsCsrAndGpuHangIsReturned) { + std::unique_ptr> gpgpuCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); + gpgpuCmdStream->initializeTagAllocation(); + gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; + + std::unique_ptr> bcsCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); + bcsCmdStream->initializeTagAllocation(); + bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; + bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::GpuHang; + + std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); + CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; + cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get(); + cmdQ->bcsCsrToReturn = bcsCmdStream.get(); + + constexpr uint32_t taskCount = 0u; + constexpr bool skipWait = false; + StackVec activeBcsStates{CopyEngineState{}}; + + const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); + EXPECT_EQ(WaitStatus::GpuHang, waitStatus); + + EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountCalledCounter); + EXPECT_EQ(1, gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); + EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); + + EXPECT_EQ(0, bcsCmdStream->waitForTaskCountCalledCounter); + EXPECT_EQ(1, bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); + EXPECT_EQ(1, bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); + + cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; +} + +HWTEST_F(WaitUntilCompletionTests, givenSuccessOnBcsCsrWhenWaitingUntilCompleteThenGpgpuCsrWaitStatusIsReturned) { + std::unique_ptr> gpgpuCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); + gpgpuCmdStream->initializeTagAllocation(); + gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; + gpgpuCmdStream->waitForTaskCountReturnValue = WaitStatus::Ready; + + std::unique_ptr> bcsCmdStream(new CommandStreamReceiverHwMock(*device->getExecutionEnvironment(), device->getRootDeviceIndex(), device->getDeviceBitfield())); + bcsCmdStream->initializeTagAllocation(); + bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackReturnValue = WaitStatus::Ready; + bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListReturnValue = WaitStatus::Ready; + + std::unique_ptr> cmdQ(new MyCmdQueue(context.get(), device.get())); + CommandStreamReceiver *oldCommandStreamReceiver = cmdQ->gpgpuEngine->commandStreamReceiver; + cmdQ->gpgpuEngine->commandStreamReceiver = gpgpuCmdStream.get(); + cmdQ->bcsCsrToReturn = bcsCmdStream.get(); + + constexpr uint32_t taskCount = 0u; + constexpr bool skipWait = false; + StackVec activeBcsStates{CopyEngineState{}}; + + const auto waitStatus = cmdQ->waitUntilComplete(taskCount, activeBcsStates, cmdQ->flushStamp->peekStamp(), false, false, skipWait); + EXPECT_EQ(WaitStatus::Ready, waitStatus); + + EXPECT_EQ(1, gpgpuCmdStream->waitForTaskCountCalledCounter); + EXPECT_EQ(1, gpgpuCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); + EXPECT_EQ(0, gpgpuCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); + + EXPECT_EQ(0, bcsCmdStream->waitForTaskCountCalledCounter); + EXPECT_EQ(1, bcsCmdStream->waitForTaskCountWithKmdNotifyFallbackCounter); + EXPECT_EQ(1, bcsCmdStream->waitForTaskCountAndCleanTemporaryAllocationListCalledCounter); cmdQ->gpgpuEngine->commandStreamReceiver = oldCommandStreamReceiver; } diff --git a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp index 9362707ee8..f5d8bcc7da 100644 --- a/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_handler_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/aub/aub_subcapture.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/program/sync_buffer_handler.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" @@ -111,9 +112,9 @@ struct EnqueueHandlerWithAubSubCaptureTests : public EnqueueHandlerTest { public: MockCmdQWithAubSubCapture(Context *context, ClDevice *device) : CommandQueueHw(context, device, nullptr, false) {} - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { waitUntilCompleteCalled = true; - CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); + return CommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } void obtainNewTimestampPacketNodes(size_t numberOfNodes, TimestampPacketContainer &previousNodes, bool clearAllDependencies, CommandStreamReceiver &csr) override { diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index ff78ac14b8..3067e2b4ed 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -7,6 +7,7 @@ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/scratch_space_controller.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" @@ -809,9 +810,9 @@ class MyCmdQ : public MockCommandQueueHw { auxTranslationDirection); } - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { waitCalled++; - MockCommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); + return MockCommandQueueHw::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } std::vector auxTranslationDirections; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index acb9cd9151..65b2029a27 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index 1273d0bb70..6e667f39c6 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_stream/wait_status.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/test.h" diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index d0b150d894..39430a9c46 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -8,6 +8,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/scratch_space_controller.h" #include "shared/source/command_stream/scratch_space_controller_base.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/constants.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp index d229964704..12055124c7 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_2_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_stream/scratch_space_controller_base.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/helpers/engine_descriptor_helper.h" diff --git a/opencl/test/unit_test/event/async_events_handler_tests.cpp b/opencl/test/unit_test/event/async_events_handler_tests.cpp index 8ff0daaa3d..e0f5fe2951 100644 --- a/opencl/test/unit_test/event/async_events_handler_tests.cpp +++ b/opencl/test/unit_test/event/async_events_handler_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/timestamp_packet.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/test_macros/mock_method_macros.h" @@ -38,14 +39,14 @@ class AsyncEventsHandlerTests : public ::testing::Test { this->updateTaskCount(taskCount, 0); } - bool wait(bool blocking, bool quickKmdSleep) override { + WaitStatus wait(bool blocking, bool quickKmdSleep) override { waitCalled++; handler->allowAsyncProcess.store(false); return waitResult; } uint32_t waitCalled = 0u; - bool waitResult = true; + WaitStatus waitResult = WaitStatus::Ready; std::unique_ptr handler; }; diff --git a/opencl/test/unit_test/event/event_fixture.h b/opencl/test/unit_test/event/event_fixture.h index 826859408d..27305824e9 100644 --- a/opencl/test/unit_test/event/event_fixture.h +++ b/opencl/test/unit_test/event/event_fixture.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,6 +7,7 @@ #pragma once +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/ptr_math.h" #include "shared/test/unit_test/utilities/base_object_utils.h" @@ -67,7 +68,7 @@ struct InternalsEventTest }; struct MyUserEvent : public VirtualEvent { - bool wait(bool blocking, bool quickKmdSleep) override { + WaitStatus wait(bool blocking, bool quickKmdSleep) override { return VirtualEvent::wait(blocking, quickKmdSleep); }; uint32_t getTaskLevel() override { diff --git a/opencl/test/unit_test/event/event_tests.cpp b/opencl/test/unit_test/event/event_tests.cpp index 0c6a505474..f55edf55d9 100644 --- a/opencl/test/unit_test/event/event_tests.cpp +++ b/opencl/test/unit_test/event/event_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/os_interface/os_interface.h" @@ -409,7 +410,7 @@ TEST_F(EventTest, GivenInvalidEventWhenGettingEventInfoThenInvalidValueErrorIsRe TEST_F(EventTest, GivenNonBlockingEventWhenWaitingThenFalseIsReturned) { Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 3, CompletionStamp::notReady); auto result = event.wait(false, false); - EXPECT_FALSE(result); + EXPECT_EQ(WaitStatus::NotReady, result); } struct UpdateEventTest : public ::testing::Test { @@ -805,6 +806,38 @@ TEST_F(InternalsEventTest, givenDeviceTimestampBaseEnabledAndGlobalStartTSSmalle event.timeStampNode = nullptr; } +TEST_F(InternalsEventTest, givenGpuHangWhenEventWaitReportsHangThenWaititingIsAbortedAndUnfinishedEventsHaveExecutionStatusEqualsToAbortedDueToGpuHang) { + MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false); + + MockEvent passingEvent(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + passingEvent.waitReturnValue = WaitStatus::Ready; + + MockEvent hangingEvent(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + hangingEvent.waitReturnValue = WaitStatus::GpuHang; + + cl_event eventWaitlist[] = {&passingEvent, &hangingEvent}; + + const auto result = Event::waitForEvents(2, eventWaitlist); + EXPECT_EQ(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST, result); + + EXPECT_NE(Event::executionAbortedDueToGpuHang, passingEvent.peekExecutionStatus()); + EXPECT_EQ(Event::executionAbortedDueToGpuHang, hangingEvent.peekExecutionStatus()); +} + +TEST_F(InternalsEventTest, givenPassingEventWhenWaitingForEventsThenWaititingIsSuccessfulAndEventIsNotAborted) { + MockCommandQueue cmdQ(mockContext, pClDevice, nullptr, false); + + MockEvent passingEvent(&cmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + passingEvent.waitReturnValue = WaitStatus::Ready; + + cl_event eventWaitlist[] = {&passingEvent}; + + const auto result = Event::waitForEvents(1, eventWaitlist); + EXPECT_EQ(CL_SUCCESS, result); + + EXPECT_NE(Event::executionAbortedDueToGpuHang, passingEvent.peekExecutionStatus()); +} + TEST_F(InternalsEventTest, GivenProfilingWHENMapOperationTHENTimesSet) { const cl_queue_properties props[3] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0}; MockCommandQueue *pCmdQ = new MockCommandQueue(mockContext, pClDevice, props, false); @@ -1520,7 +1553,9 @@ HWTEST_F(EventTest, givenQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestToWa Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.updateCompletionStamp(1u, 0, 1u, 1u); - event.wait(true, true); + const auto result = event.wait(true, true); + EXPECT_EQ(WaitStatus::Ready, result); + EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(localHwInfo.capabilityTable.kmdNotifyProperties.delayQuickKmdSleepMicroseconds, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } @@ -1541,11 +1576,25 @@ HWTEST_F(EventTest, givenNonQuickKmdSleepRequestWhenWaitIsCalledThenPassRequestT Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); event.updateCompletionStamp(1u, 0, 1u, 1u); - event.wait(true, false); + const auto result = event.wait(true, false); + EXPECT_EQ(WaitStatus::Ready, result); + EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(localHwInfo.capabilityTable.kmdNotifyProperties.delayKmdNotifyMicroseconds, csr->waitForCompletionWithTimeoutParamsPassed[0].timeoutMs); } +HWTEST_F(EventTest, givenGpuHangWhenWaitIsCalledThenPassRequestToWaitingFunctionAndReturnGpuHang) { + auto csr = new TestEventCsr(*pDevice->executionEnvironment, pDevice->getDeviceBitfield()); + csr->waitForCompletionWithTimeoutResult = WaitStatus::GpuHang; + pDevice->resetCommandStreamReceiver(csr); + + Event event(pCmdQ, CL_COMMAND_NDRANGE_KERNEL, 0, 0); + + const auto waitStatus = event.wait(true, false); + EXPECT_EQ(WaitStatus::GpuHang, waitStatus); + EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); +} + HWTEST_F(InternalsEventTest, givenCommandWhenSubmitCalledThenUpdateFlushStamp) { auto pCmdQ = std::unique_ptr(new MockCommandQueue(mockContext, pClDevice, 0, false)); MockEvent *event = new MockEvent(pCmdQ.get(), CL_COMMAND_MARKER, 0, 0); diff --git a/opencl/test/unit_test/event/user_events_tests.cpp b/opencl/test/unit_test/event/user_events_tests.cpp index eda96b1df6..99e1f2a6e1 100644 --- a/opencl/test/unit_test/event/user_events_tests.cpp +++ b/opencl/test/unit_test/event/user_events_tests.cpp @@ -1,10 +1,11 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ +#include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/internal_allocation_storage.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/os_interface/os_context.h" @@ -131,7 +132,7 @@ TEST(UserEvent, GivenInitialUserEventStateWhenCheckingReadyForSubmissionThenFals TEST(UserEvent, GivenUserEventWhenGettingTaskLevelThenZeroIsReturned) { MyUserEvent uEvent; EXPECT_EQ(0U, uEvent.getTaskLevel()); - EXPECT_FALSE(uEvent.wait(false, false)); + EXPECT_EQ(WaitStatus::NotReady, uEvent.wait(false, false)); } TEST(UserEvent, WhenSettingStatusThenReadyForSubmissionisTrue) { @@ -952,7 +953,7 @@ TEST_F(EventTests, WhenWaitingForEventsThenTemporaryAllocationsAreDestroyed) { TEST_F(EventTest, WhenUserEventIsCreatedThenWaitIsNonBlocking) { UserEvent event; auto result = event.wait(false, false); - EXPECT_FALSE(result); + EXPECT_EQ(WaitStatus::NotReady, result); } TEST_F(EventTest, GivenSingleUserEventWhenWaitingForEventsThenSuccessIsReturned) { diff --git a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp index f439452f80..1789d5b8fd 100644 --- a/opencl/test/unit_test/helpers/kmd_notify_tests.cpp +++ b/opencl/test/unit_test/helpers/kmd_notify_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_stream/wait_status.h" #include "shared/source/os_interface/os_context.h" #include "shared/test/common/helpers/debug_manager_state_restore.h" #include "shared/test/common/mocks/mock_device.h" @@ -127,6 +128,7 @@ struct KmdNotifyTests : public ::testing::Test { HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTryCpuPolling) { auto csr = createMockCsr(); + cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); @@ -137,6 +139,7 @@ HWTEST_F(KmdNotifyTests, givenTaskCountWhenWaitUntilCompletionCalledThenAlwaysTr HWTEST_F(KmdNotifyTests, givenTaskCountAndKmdNotifyDisabledWhenWaitUntilCompletionCalledThenTryCpuPollingWithoutTimeout) { overrideKmdNotifyParams(false, 0, false, 0, false, 0, false, 0); auto csr = createMockCsr(); + cmdQ->waitUntilComplete(taskCountToWait, {}, flushStampToWait, false); EXPECT_EQ(0u, csr->waitForFlushStampCalled); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); @@ -274,6 +277,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModeAndCal auto csr = createMockCsr(); cmdQ->throttle = QueueThrottle::LOW; + cmdQ->waitUntilComplete(1, {}, 1, false); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(true, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); @@ -285,6 +289,7 @@ HWTEST_F(KmdNotifyTests, givenKmdNotifyDisabledWhenQueueHasPowerSavingModButTher auto csr = createMockCsr(); cmdQ->throttle = QueueThrottle::LOW; + cmdQ->waitUntilComplete(1, {}, 0, false); EXPECT_EQ(1u, csr->waitForCompletionWithTimeoutCalled); EXPECT_EQ(false, csr->waitForCompletionWithTimeoutParamsPassed[0].enableTimeout); @@ -528,4 +533,4 @@ TEST_F(KmdNotifyTests, givenDisabledKmdDirectSubmissionNotifyMechanismWhenDirect bool timeoutEnabled = helper.obtainTimeoutParams(timeout, false, 1, 2, flushStampToWait, false, true, directSubmission); EXPECT_TRUE(timeoutEnabled); EXPECT_EQ(expectedTimeout, timeout); -} +} \ No newline at end of file diff --git a/opencl/test/unit_test/kernel/kernel_tests.cpp b/opencl/test/unit_test/kernel/kernel_tests.cpp index a160f38756..199ed7c91f 100644 --- a/opencl/test/unit_test/kernel/kernel_tests.cpp +++ b/opencl/test/unit_test/kernel/kernel_tests.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/command_stream/command_stream_receiver_hw.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/flush_stamp.h" diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index edda8b1e7e..28dc58bad9 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/test/common/cmd_parse/hw_parse.h" @@ -47,10 +48,12 @@ struct BcsBufferTests : public ::testing::Test { return WaitStatus::Ready; } - void waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { + WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) override { EXPECT_EQ(1u, waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_EQ(this->latestFlushedTaskCount, requiredTaskCount); waitForTaskCountAndCleanAllocationListCalled++; + + return WaitStatus::Ready; } uint32_t waitForTaskCountAndCleanAllocationListCalled = 0; diff --git a/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp b/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp index 085ce08caa..b2f1e7e6f6 100644 --- a/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp +++ b/opencl/test/unit_test/mem_obj/mem_obj_destruction_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/unified_memory_manager.h" #include "shared/source/os_interface/os_context.h" diff --git a/opencl/test/unit_test/mocks/mock_command_queue.h b/opencl/test/unit_test/mocks/mock_command_queue.h index c418b7d3ae..a8b1b2d7a2 100644 --- a/opencl/test/unit_test/mocks/mock_command_queue.h +++ b/opencl/test/unit_test/mocks/mock_command_queue.h @@ -1,11 +1,13 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * */ #pragma once + +#include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/test/common/libult/ult_command_stream_receiver.h" @@ -90,12 +92,12 @@ class MockCommandQueue : public CommandQueue { return writeBufferRetValue; } - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { latestTaskCountWaited = gpgpuTaskCountToWait; return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { + WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep) override { latestTaskCountWaited = gpgpuTaskCountToWait; return CommandQueue::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep); } @@ -333,7 +335,7 @@ class MockCommandQueueHw : public CommandQueueHw { useBcsCsrOnNotifyEnabled = notifyBcsCsr; } - void waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { + WaitStatus waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) override { latestTaskCountWaited = gpgpuTaskCountToWait; return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait); } diff --git a/opencl/test/unit_test/mocks/mock_event.h b/opencl/test/unit_test/mocks/mock_event.h index 6b6a4c5af5..56463d3f01 100644 --- a/opencl/test/unit_test/mocks/mock_event.h +++ b/opencl/test/unit_test/mocks/mock_event.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -7,9 +7,13 @@ #pragma once +#include "shared/source/command_stream/wait_status.h" + #include "opencl/source/event/event_builder.h" #include "opencl/source/event/user_event.h" +#include + namespace NEO { #define FORWARD_CONSTRUCTOR(THIS_CLASS, BASE_CLASS) \ @@ -37,6 +41,16 @@ struct MockEvent : public BaseEventType { using Event::queueTimeStamp; using Event::submitTimeStamp; using Event::timestampPacketContainer; + + WaitStatus wait(bool blocking, bool useQuickKmdSleep) override { + if (waitReturnValue.has_value()) { + return *waitReturnValue; + } + + return BaseEventType::wait(blocking, useQuickKmdSleep); + } + + std::optional waitReturnValue{}; }; #undef FORWARD_CONSTRUCTOR diff --git a/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp b/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp index 8e550cf59e..635a2d77b9 100644 --- a/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp +++ b/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp @@ -781,7 +781,8 @@ TEST_F(WddmCommandStreamTest, givenTwoTemporaryAllocationsWhenCleanTemporaryAllo graphicsAllocation->updateTaskCount(1, csr->getOsContext().getContextId()); graphicsAllocation2->updateTaskCount(100, csr->getOsContext().getContextId()); - csr->waitForTaskCountAndCleanAllocationList(1, TEMPORARY_ALLOCATION); + const auto firstWaitResult = csr->waitForTaskCountAndCleanAllocationList(1, TEMPORARY_ALLOCATION); + EXPECT_EQ(WaitStatus::Ready, firstWaitResult); // graphicsAllocation2 still lives EXPECT_EQ(host_ptr2, graphicsAllocation2->getUnderlyingBuffer()); @@ -797,8 +798,10 @@ TEST_F(WddmCommandStreamTest, givenTwoTemporaryAllocationsWhenCleanTemporaryAllo auto fragment2 = hostPtrManager->getFragment({alignedPtr, csr->getRootDeviceIndex()}); EXPECT_EQ(nullptr, fragment2); + // destroy remaining allocation - csr->waitForTaskCountAndCleanAllocationList(100, TEMPORARY_ALLOCATION); + const auto secondWaitResult = csr->waitForTaskCountAndCleanAllocationList(100, TEMPORARY_ALLOCATION); + EXPECT_EQ(WaitStatus::Ready, secondWaitResult); } TEST_F(WddmCommandStreamMockGdiTest, WhenFlushingThenWddmMakeResidentIsCalledForResidencyAllocations) { diff --git a/shared/source/command_stream/CMakeLists.txt b/shared/source/command_stream/CMakeLists.txt index 563782155c..bd2a8a309e 100644 --- a/shared/source/command_stream/CMakeLists.txt +++ b/shared/source/command_stream/CMakeLists.txt @@ -59,6 +59,7 @@ set(NEO_CORE_COMMAND_STREAM ${CMAKE_CURRENT_SOURCE_DIR}/tbx_command_stream_receiver_hw.inl ${CMAKE_CURRENT_SOURCE_DIR}/tbx_stream.cpp ${CMAKE_CURRENT_SOURCE_DIR}/thread_arbitration_policy.h + ${CMAKE_CURRENT_SOURCE_DIR}/wait_status.h ) if(SUPPORT_XEHP_AND_LATER) diff --git a/shared/source/command_stream/aub_command_stream_receiver_hw.h b/shared/source/command_stream/aub_command_stream_receiver_hw.h index c68d93a7a8..cac8ae1b0d 100644 --- a/shared/source/command_stream/aub_command_stream_receiver_hw.h +++ b/shared/source/command_stream/aub_command_stream_receiver_hw.h @@ -10,6 +10,7 @@ #include "shared/source/command_stream/aub_command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_simulated_hw.h" #include "shared/source/command_stream/submission_status.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/array_count.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/page_table.h" diff --git a/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl b/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl index 16a7466fff..681bc80cf8 100644 --- a/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/aub_command_stream_receiver_hw_base.inl @@ -13,6 +13,7 @@ #include "shared/source/aub_mem_dump/page_table_entry_bits.h" #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" diff --git a/shared/source/command_stream/command_stream_receiver.cpp b/shared/source/command_stream/command_stream_receiver.cpp index 5a04f070c3..26cdc04958 100644 --- a/shared/source/command_stream/command_stream_receiver.cpp +++ b/shared/source/command_stream/command_stream_receiver.cpp @@ -165,24 +165,29 @@ void CommandStreamReceiver::makeResidentHostPtrAllocation(GraphicsAllocation *gf makeResident(*gfxAllocation); } -void CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) { +WaitStatus CommandStreamReceiver::waitForTaskCount(uint32_t requiredTaskCount) { auto address = getTagAddress(); if (address) { - baseWaitFunction(address, false, 0, requiredTaskCount); + return baseWaitFunction(address, false, 0, requiredTaskCount); } + + return WaitStatus::Ready; } -void CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) { +WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage) { + WaitStatus waitStatus{WaitStatus::Ready}; auto &list = allocationUsage == TEMPORARY_ALLOCATION ? internalAllocationStorage->getTemporaryAllocations() : internalAllocationStorage->getAllocationsForReuse(); if (!list.peekIsEmpty()) { - this->CommandStreamReceiver::waitForTaskCount(requiredTaskCount); + waitStatus = this->CommandStreamReceiver::waitForTaskCount(requiredTaskCount); } internalAllocationStorage->cleanAllocationList(requiredTaskCount, allocationUsage); + + return waitStatus; } -void CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) { - waitForTaskCountAndCleanAllocationList(requiredTaskCount, TEMPORARY_ALLOCATION); -}; +WaitStatus CommandStreamReceiver::waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount) { + return waitForTaskCountAndCleanAllocationList(requiredTaskCount, TEMPORARY_ALLOCATION); +} void CommandStreamReceiver::ensureCommandBufferAllocation(LinearStream &commandStream, size_t minimumRequiredSize, size_t additionalAllocationSize) { if (commandStream.getAvailableSpace() >= minimumRequiredSize) { diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 8e20110b0a..2acc4d0897 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -14,6 +14,7 @@ #include "shared/source/command_stream/submission_status.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/command_stream/thread_arbitration_policy.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/helpers/aligned_memory.h" #include "shared/source/helpers/blit_commands_helper.h" #include "shared/source/helpers/common_types.h" @@ -64,12 +65,6 @@ enum class DispatchMode { BatchedDispatch // dispatching is batched, explicit clFlush is required }; -enum class WaitStatus { - NotReady = 0, - Ready = 1, - GpuHang = 2, -}; - class CommandStreamReceiver { public: enum class SamplerCacheFlushState { @@ -114,9 +109,9 @@ class CommandStreamReceiver { virtual GmmPageTableMngr *createPageTableManager() { return nullptr; } bool needsPageTableManager() const; - MOCKABLE_VIRTUAL void waitForTaskCount(uint32_t requiredTaskCount); - void waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage); - MOCKABLE_VIRTUAL void waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount); + MOCKABLE_VIRTUAL WaitStatus waitForTaskCount(uint32_t requiredTaskCount); + WaitStatus waitForTaskCountAndCleanAllocationList(uint32_t requiredTaskCount, uint32_t allocationUsage); + MOCKABLE_VIRTUAL WaitStatus waitForTaskCountAndCleanTemporaryAllocationList(uint32_t requiredTaskCount); LinearStream &getCS(size_t minRequiredSize = 1024u); OSInterface *getOSInterface() const; diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 736c3f0a12..8f6d688e4e 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/submission_status.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/direct_submission/dispatchers/blitter_dispatcher.h" #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index ff878e6568..675f3b2b50 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -12,6 +12,7 @@ #include "shared/source/command_stream/preemption.h" #include "shared/source/command_stream/scratch_space_controller_base.h" #include "shared/source/command_stream/stream_properties.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" #include "shared/source/direct_submission/direct_submission_controller.h" diff --git a/shared/source/command_stream/command_stream_receiver_with_aub_dump.h b/shared/source/command_stream/command_stream_receiver_with_aub_dump.h index 2c08bc5dc5..5d87110cae 100644 --- a/shared/source/command_stream/command_stream_receiver_with_aub_dump.h +++ b/shared/source/command_stream/command_stream_receiver_with_aub_dump.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" +#include "shared/source/command_stream/wait_status.h" #include diff --git a/shared/source/command_stream/tbx_command_stream_receiver_hw.h b/shared/source/command_stream/tbx_command_stream_receiver_hw.h index 89a0c86ad5..57763a8934 100644 --- a/shared/source/command_stream/tbx_command_stream_receiver_hw.h +++ b/shared/source/command_stream/tbx_command_stream_receiver_hw.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/command_stream/command_stream_receiver_simulated_hw.h" #include "shared/source/command_stream/tbx_command_stream_receiver.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/memory_manager/address_mapper.h" #include "shared/source/memory_manager/os_agnostic_memory_manager.h" #include "shared/source/memory_manager/page_table.h" diff --git a/shared/source/command_stream/wait_status.h b/shared/source/command_stream/wait_status.h new file mode 100644 index 0000000000..2692d35242 --- /dev/null +++ b/shared/source/command_stream/wait_status.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2018-2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once + +namespace NEO { + +enum class WaitStatus { + NotReady = 0, + Ready = 1, + GpuHang = 2, +}; + +} // namespace NEO \ No newline at end of file diff --git a/shared/test/common/libult/ult_command_stream_receiver.h b/shared/test/common/libult/ult_command_stream_receiver.h index 44fdc73e43..e046ec1561 100644 --- a/shared/test/common/libult/ult_command_stream_receiver.h +++ b/shared/test/common/libult/ult_command_stream_receiver.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/command_stream/command_stream_receiver_hw.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/device/device.h" #include "shared/source/direct_submission/direct_submission_hw.h" #include "shared/source/execution_environment/execution_environment.h" diff --git a/shared/test/common/mocks/mock_aub_csr.h b/shared/test/common/mocks/mock_aub_csr.h index 4b7e43a103..0e6f337729 100644 --- a/shared/test/common/mocks/mock_aub_csr.h +++ b/shared/test/common/mocks/mock_aub_csr.h @@ -9,6 +9,7 @@ #include "shared/source/command_stream/aub_command_stream_receiver_hw.h" #include "shared/source/command_stream/preemption.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/helpers/hw_info.h" diff --git a/shared/test/common/mocks/mock_command_stream_receiver.h b/shared/test/common/mocks/mock_command_stream_receiver.h index 0b0a85bb3c..6a3ca68a85 100644 --- a/shared/test/common/mocks/mock_command_stream_receiver.h +++ b/shared/test/common/mocks/mock_command_stream_receiver.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/command_stream_receiver_hw.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/execution_environment/execution_environment.h" #include "shared/source/helpers/flat_batch_buffer_helper_hw.h" #include "shared/source/helpers/flush_stamp.h" diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 8756651380..be005b2b80 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -7,6 +7,7 @@ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/command_stream_receiver_simulated_hw.h" +#include "shared/source/command_stream/wait_status.h" #include "shared/source/gmm_helper/page_table_mngr.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/memory_manager/internal_allocation_storage.h" @@ -23,6 +24,7 @@ #include "shared/test/common/mocks/mock_csr.h" #include "shared/test/common/mocks/mock_driver_model.h" #include "shared/test/common/mocks/mock_execution_environment.h" +#include "shared/test/common/mocks/mock_internal_allocation_storage.h" #include "shared/test/common/mocks/mock_memory_manager.h" #include "shared/test/common/mocks/ult_device_factory.h" #include "shared/test/common/test_macros/matchers.h" @@ -265,6 +267,55 @@ HWTEST_F(CommandStreamReceiverTest, givenFailingFlushSubmissionsAndNoGpuHangWhen EXPECT_EQ(WaitStatus::NotReady, waitStatus); } +HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForTaskCountThenGpuHangIsReturned) { + auto driverModelMock = std::make_unique(); + driverModelMock->isGpuHangDetectedToReturn = true; + + auto osInterface = std::make_unique(); + osInterface->setDriverModel(std::move(driverModelMock)); + + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface); + csr.activePartitions = 1; + csr.gpuHangCheckPeriod = 0us; + + volatile std::uint32_t tasksCount[16] = {}; + csr.tagAddress = tasksCount; + + constexpr auto taskCountToWait = 1; + const auto waitStatus = csr.waitForTaskCount(taskCountToWait); + EXPECT_EQ(WaitStatus::GpuHang, waitStatus); +} + +HWTEST_F(CommandStreamReceiverTest, givenGpuHangAndNonEmptyAllocationsListWhenCallingWaitForTaskCountAndCleanAllocationListThenWaitIsCalledAndGpuHangIsReturned) { + auto driverModelMock = std::make_unique(); + driverModelMock->isGpuHangDetectedToReturn = true; + + auto osInterface = std::make_unique(); + osInterface->setDriverModel(std::move(driverModelMock)); + + auto &csr = pDevice->getUltCommandStreamReceiver(); + csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface); + csr.activePartitions = 1; + csr.gpuHangCheckPeriod = 0us; + + volatile std::uint32_t tasksCount[16] = {}; + csr.tagAddress = tasksCount; + + auto hostPtr = reinterpret_cast(0x1234); + size_t size = 100; + + auto temporaryAllocation = std::make_unique(0, AllocationType::EXTERNAL_HOST_PTR, hostPtr, size, 0, MemoryPool::System4KBPages, MemoryManager::maxOsContextCount); + temporaryAllocation->updateTaskCount(0u, 0u); + csr.getInternalAllocationStorage()->storeAllocationWithTaskCount(std::move(temporaryAllocation), TEMPORARY_ALLOCATION, 2u); + + constexpr auto taskCountToWait = 1; + constexpr auto allocationUsage = TEMPORARY_ALLOCATION; + const auto waitStatus = csr.waitForTaskCountAndCleanAllocationList(taskCountToWait, allocationUsage); + + EXPECT_EQ(WaitStatus::GpuHang, waitStatus); +} + HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenCheckedForInitialStatusOfStatelessMocsIndexThenUnknownMocsIsReturend) { auto &csr = pDevice->getUltCommandStreamReceiver(); EXPECT_EQ(CacheSettings::unknownMocs, csr.latestSentStatelessMocsConfig); @@ -1237,8 +1288,10 @@ TEST(CommandStreamReceiverSimpleTest, givenMultipleActivePartitionsWhenWaitingFo CpuIntrinsicsTests::pauseOffset = csr.getPostSyncWriteOffset(); CpuIntrinsicsTests::pauseCounter = 0; - csr.waitForTaskCountAndCleanTemporaryAllocationList(3u); + + const auto waitStatus = csr.waitForTaskCountAndCleanTemporaryAllocationList(3u); EXPECT_EQ(2u, CpuIntrinsicsTests::pauseCounter); + EXPECT_EQ(WaitStatus::Ready, waitStatus); CpuIntrinsicsTests::pauseAddress = nullptr; } @@ -1261,8 +1314,10 @@ TEST(CommandStreamReceiverSimpleTest, givenEmptyTemporaryAllocationListWhenWaiti CpuIntrinsicsTests::pauseValue = 3u; CpuIntrinsicsTests::pauseCounter = 0; - csr.waitForTaskCountAndCleanTemporaryAllocationList(3u); + + const auto waitStatus = csr.waitForTaskCountAndCleanTemporaryAllocationList(3u); EXPECT_EQ(0u, CpuIntrinsicsTests::pauseCounter); + EXPECT_EQ(WaitStatus::Ready, waitStatus); CpuIntrinsicsTests::pauseAddress = nullptr; }