mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
fix: zeFenceHostSynchronize() to flush printf output
- zeFenceHostSynchronize() should flush printf output from GPU kernels Related-To: NEO-7625 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
1db08cdfa0
commit
87ea9473e4
@@ -87,6 +87,7 @@ struct CommandQueueImp : public CommandQueue {
|
||||
virtual bool getPreemptionCmdProgramming() = 0;
|
||||
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) override;
|
||||
void makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) override;
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
|
||||
protected:
|
||||
MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,
|
||||
@@ -94,8 +95,6 @@ struct CommandQueueImp : public CommandQueue {
|
||||
|
||||
ze_result_t synchronizeByPollingForTaskCount(uint64_t timeout);
|
||||
|
||||
void printKernelsPrintfOutput(bool hangDetected);
|
||||
|
||||
void postSyncOperations(bool hangDetected);
|
||||
|
||||
CommandBufferManager buffers;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2022 Intel Corporation
|
||||
* Copyright (C) 2020-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -58,29 +58,29 @@ ze_result_t Fence::hostSynchronize(uint64_t timeout) {
|
||||
return ZE_RESULT_NOT_READY;
|
||||
}
|
||||
|
||||
if (timeout == 0) {
|
||||
return queryStatus();
|
||||
}
|
||||
|
||||
waitStartTime = std::chrono::high_resolution_clock::now();
|
||||
lastHangCheckTime = waitStartTime;
|
||||
while (timeDiff < timeout) {
|
||||
do {
|
||||
ret = queryStatus();
|
||||
if (ret == ZE_RESULT_SUCCESS) {
|
||||
cmdQueue->printKernelsPrintfOutput(false);
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
currentTime = std::chrono::high_resolution_clock::now();
|
||||
if (csr->checkGpuHangDetected(currentTime, lastHangCheckTime)) {
|
||||
cmdQueue->printKernelsPrintfOutput(true);
|
||||
return ZE_RESULT_ERROR_DEVICE_LOST;
|
||||
}
|
||||
|
||||
if (timeout == std::numeric_limits<uint64_t>::max()) {
|
||||
continue;
|
||||
} else if (timeout == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(currentTime - waitStartTime).count();
|
||||
}
|
||||
} while (timeDiff < timeout);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_driver_model.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/core/source/fence/fence.h"
|
||||
@@ -15,6 +16,7 @@
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <cstdint>
|
||||
@@ -349,5 +351,117 @@ TEST_F(FenceTest, givenFenceWhenResettingThenTaskCountIsReset) {
|
||||
delete fence;
|
||||
}
|
||||
|
||||
HWTEST_F(FenceTest, givenPrintfKernelWhenSynchronizingFenceThenPrintPrintfOutputIsCalled) {
|
||||
const ze_command_queue_desc_t desc{};
|
||||
ze_result_t returnValue;
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
&desc,
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
Mock<Kernel> kernel;
|
||||
TaskCountType currentTaskCount = 33u;
|
||||
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.returnWaitForCompletionWithTimeout = WaitStatus::Ready;
|
||||
csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
|
||||
*csr.tagAddress = currentTaskCount;
|
||||
commandQueue->printfKernelContainer.push_back(&kernel);
|
||||
|
||||
ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC,
|
||||
nullptr,
|
||||
0};
|
||||
auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc));
|
||||
ASSERT_NE(fence, nullptr);
|
||||
fence->taskCount = currentTaskCount;
|
||||
ze_result_t result = fence->hostSynchronize(1);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(0u, commandQueue->printfKernelContainer.size());
|
||||
EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes);
|
||||
EXPECT_FALSE(kernel.hangDetectedPassedToPrintfOutput);
|
||||
|
||||
delete fence;
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(FenceTest, givenPrintfKernelAndDetectedHangWhenSynchronizingFenceThenPrintPrintfOutputAfterHangIsCalled) {
|
||||
auto driverModel = new NEO::MockDriverModel();
|
||||
driverModel->isGpuHangDetectedToReturn = true;
|
||||
neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface);
|
||||
neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr<DriverModel>(driverModel));
|
||||
|
||||
const ze_command_queue_desc_t desc{};
|
||||
ze_result_t returnValue;
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
&desc,
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
|
||||
Mock<Kernel> kernel;
|
||||
TaskCountType currentTaskCount = 33u;
|
||||
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
|
||||
csr.returnWaitForCompletionWithTimeout = WaitStatus::GpuHang;
|
||||
*csr.tagAddress = 0;
|
||||
csr.gpuHangCheckPeriod = 0us;
|
||||
commandQueue->printfKernelContainer.push_back(&kernel);
|
||||
|
||||
ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC,
|
||||
nullptr,
|
||||
0};
|
||||
auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc));
|
||||
ASSERT_NE(fence, nullptr);
|
||||
fence->taskCount = 1;
|
||||
ze_result_t result = fence->hostSynchronize(1);
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
|
||||
|
||||
EXPECT_EQ(0u, commandQueue->printfKernelContainer.size());
|
||||
EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes);
|
||||
EXPECT_TRUE(kernel.hangDetectedPassedToPrintfOutput);
|
||||
|
||||
delete fence;
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST_F(FenceTest, givenPrintfKernelNotCompletedWhenSynchronizingFenceWithZeroTimeoutThenPrintfOutputIsNotFlushed) {
|
||||
const ze_command_queue_desc_t desc{};
|
||||
ze_result_t returnValue;
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
&desc,
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
Mock<Kernel> kernel;
|
||||
TaskCountType currentTaskCount = 33u;
|
||||
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.returnWaitForCompletionWithTimeout = WaitStatus::Ready;
|
||||
csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
|
||||
*csr.tagAddress = currentTaskCount - 1;
|
||||
|
||||
commandQueue->printfKernelContainer.push_back(&kernel);
|
||||
|
||||
ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC,
|
||||
nullptr,
|
||||
0};
|
||||
auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc));
|
||||
ASSERT_NE(fence, nullptr);
|
||||
fence->taskCount = currentTaskCount;
|
||||
ze_result_t result = fence->hostSynchronize(0);
|
||||
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
|
||||
|
||||
EXPECT_EQ(1u, commandQueue->printfKernelContainer.size());
|
||||
EXPECT_EQ(0u, kernel.printPrintfOutputCalledTimes);
|
||||
|
||||
delete fence;
|
||||
commandQueue->destroy();
|
||||
}
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user