diff --git a/level_zero/core/source/cmdqueue/cmdqueue_imp.h b/level_zero/core/source/cmdqueue/cmdqueue_imp.h index 131338a359..cc148de705 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_imp.h +++ b/level_zero/core/source/cmdqueue/cmdqueue_imp.h @@ -87,6 +87,7 @@ struct CommandQueueImp : public CommandQueue { virtual bool getPreemptionCmdProgramming() = 0; void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock &lockForIndirect, bool performMigration) override; void makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) override; + void printKernelsPrintfOutput(bool hangDetected); protected: MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr, @@ -94,8 +95,6 @@ struct CommandQueueImp : public CommandQueue { ze_result_t synchronizeByPollingForTaskCount(uint64_t timeout); - void printKernelsPrintfOutput(bool hangDetected); - void postSyncOperations(bool hangDetected); CommandBufferManager buffers; diff --git a/level_zero/core/source/fence/fence.cpp b/level_zero/core/source/fence/fence.cpp index da830fe52a..fa9debf4c9 100644 --- a/level_zero/core/source/fence/fence.cpp +++ b/level_zero/core/source/fence/fence.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2022 Intel Corporation + * Copyright (C) 2020-2023 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -58,29 +58,29 @@ ze_result_t Fence::hostSynchronize(uint64_t timeout) { return ZE_RESULT_NOT_READY; } - if (timeout == 0) { - return queryStatus(); - } - waitStartTime = std::chrono::high_resolution_clock::now(); lastHangCheckTime = waitStartTime; - while (timeDiff < timeout) { + do { ret = queryStatus(); if (ret == ZE_RESULT_SUCCESS) { + cmdQueue->printKernelsPrintfOutput(false); return ZE_RESULT_SUCCESS; } currentTime = std::chrono::high_resolution_clock::now(); if (csr->checkGpuHangDetected(currentTime, lastHangCheckTime)) { + cmdQueue->printKernelsPrintfOutput(true); return ZE_RESULT_ERROR_DEVICE_LOST; } if (timeout == std::numeric_limits::max()) { continue; + } else if (timeout == 0) { + break; } timeDiff = std::chrono::duration_cast(currentTime - waitStartTime).count(); - } + } while (timeDiff < timeout); return ret; } diff --git a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp index 54ddfd0ca6..cb3f8ac3a3 100644 --- a/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp +++ b/level_zero/core/test/unit_tests/sources/fence/test_fence.cpp @@ -8,6 +8,7 @@ #include "shared/source/built_ins/sip.h" #include "shared/test/common/mocks/mock_command_stream_receiver.h" #include "shared/test/common/mocks/mock_csr.h" +#include "shared/test/common/mocks/mock_driver_model.h" #include "shared/test/common/test_macros/hw_test.h" #include "level_zero/core/source/fence/fence.h" @@ -15,6 +16,7 @@ #include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_fence.h" +#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include #include @@ -349,5 +351,117 @@ TEST_F(FenceTest, givenFenceWhenResettingThenTaskCountIsReset) { delete fence; } +HWTEST_F(FenceTest, givenPrintfKernelWhenSynchronizingFenceThenPrintPrintfOutputIsCalled) { + const ze_command_queue_desc_t desc{}; + ze_result_t returnValue; + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, + device, + neoDevice->getDefaultEngine().commandStreamReceiver, + &desc, + false, + false, + returnValue)); + Mock kernel; + TaskCountType currentTaskCount = 33u; + auto &csr = neoDevice->getUltCommandStreamReceiver(); + csr.returnWaitForCompletionWithTimeout = WaitStatus::Ready; + csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount; + *csr.tagAddress = currentTaskCount; + commandQueue->printfKernelContainer.push_back(&kernel); + + ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC, + nullptr, + 0}; + auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc)); + ASSERT_NE(fence, nullptr); + fence->taskCount = currentTaskCount; + ze_result_t result = fence->hostSynchronize(1); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + EXPECT_EQ(0u, commandQueue->printfKernelContainer.size()); + EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes); + EXPECT_FALSE(kernel.hangDetectedPassedToPrintfOutput); + + delete fence; + + commandQueue->destroy(); +} + +HWTEST_F(FenceTest, givenPrintfKernelAndDetectedHangWhenSynchronizingFenceThenPrintPrintfOutputAfterHangIsCalled) { + auto driverModel = new NEO::MockDriverModel(); + driverModel->isGpuHangDetectedToReturn = true; + neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface); + neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr(driverModel)); + + const ze_command_queue_desc_t desc{}; + ze_result_t returnValue; + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, + device, + neoDevice->getDefaultEngine().commandStreamReceiver, + &desc, + false, + false, + returnValue)); + + Mock kernel; + TaskCountType currentTaskCount = 33u; + auto &csr = neoDevice->getUltCommandStreamReceiver(); + csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount; + csr.returnWaitForCompletionWithTimeout = WaitStatus::GpuHang; + *csr.tagAddress = 0; + csr.gpuHangCheckPeriod = 0us; + commandQueue->printfKernelContainer.push_back(&kernel); + + ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC, + nullptr, + 0}; + auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc)); + ASSERT_NE(fence, nullptr); + fence->taskCount = 1; + ze_result_t result = fence->hostSynchronize(1); + EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result); + + EXPECT_EQ(0u, commandQueue->printfKernelContainer.size()); + EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes); + EXPECT_TRUE(kernel.hangDetectedPassedToPrintfOutput); + + delete fence; + commandQueue->destroy(); +} + +HWTEST_F(FenceTest, givenPrintfKernelNotCompletedWhenSynchronizingFenceWithZeroTimeoutThenPrintfOutputIsNotFlushed) { + const ze_command_queue_desc_t desc{}; + ze_result_t returnValue; + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, + device, + neoDevice->getDefaultEngine().commandStreamReceiver, + &desc, + false, + false, + returnValue)); + Mock kernel; + TaskCountType currentTaskCount = 33u; + auto &csr = neoDevice->getUltCommandStreamReceiver(); + csr.returnWaitForCompletionWithTimeout = WaitStatus::Ready; + csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount; + *csr.tagAddress = currentTaskCount - 1; + + commandQueue->printfKernelContainer.push_back(&kernel); + + ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC, + nullptr, + 0}; + auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc)); + ASSERT_NE(fence, nullptr); + fence->taskCount = currentTaskCount; + ze_result_t result = fence->hostSynchronize(0); + EXPECT_EQ(ZE_RESULT_NOT_READY, result); + + EXPECT_EQ(1u, commandQueue->printfKernelContainer.size()); + EXPECT_EQ(0u, kernel.printPrintfOutputCalledTimes); + + delete fence; + commandQueue->destroy(); +} } // namespace ult } // namespace L0