fix: zeFenceHostSynchronize() to flush printf output

- zeFenceHostSynchronize() should flush printf output from GPU kernels

Related-To: NEO-7625

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2023-02-17 19:47:30 +00:00
committed by Compute-Runtime-Automation
parent 1db08cdfa0
commit 87ea9473e4
3 changed files with 122 additions and 9 deletions

View File

@@ -87,6 +87,7 @@ struct CommandQueueImp : public CommandQueue {
virtual bool getPreemptionCmdProgramming() = 0;
void handleIndirectAllocationResidency(UnifiedMemoryControls unifiedMemoryControls, std::unique_lock<std::mutex> &lockForIndirect, bool performMigration) override;
void makeResidentAndMigrate(bool performMigration, const NEO::ResidencyContainer &residencyContainer) override;
void printKernelsPrintfOutput(bool hangDetected);
protected:
MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,
@@ -94,8 +95,6 @@ struct CommandQueueImp : public CommandQueue {
ze_result_t synchronizeByPollingForTaskCount(uint64_t timeout);
void printKernelsPrintfOutput(bool hangDetected);
void postSyncOperations(bool hangDetected);
CommandBufferManager buffers;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -58,29 +58,29 @@ ze_result_t Fence::hostSynchronize(uint64_t timeout) {
return ZE_RESULT_NOT_READY;
}
if (timeout == 0) {
return queryStatus();
}
waitStartTime = std::chrono::high_resolution_clock::now();
lastHangCheckTime = waitStartTime;
while (timeDiff < timeout) {
do {
ret = queryStatus();
if (ret == ZE_RESULT_SUCCESS) {
cmdQueue->printKernelsPrintfOutput(false);
return ZE_RESULT_SUCCESS;
}
currentTime = std::chrono::high_resolution_clock::now();
if (csr->checkGpuHangDetected(currentTime, lastHangCheckTime)) {
cmdQueue->printKernelsPrintfOutput(true);
return ZE_RESULT_ERROR_DEVICE_LOST;
}
if (timeout == std::numeric_limits<uint64_t>::max()) {
continue;
} else if (timeout == 0) {
break;
}
timeDiff = std::chrono::duration_cast<std::chrono::nanoseconds>(currentTime - waitStartTime).count();
}
} while (timeDiff < timeout);
return ret;
}

View File

@@ -8,6 +8,7 @@
#include "shared/source/built_ins/sip.h"
#include "shared/test/common/mocks/mock_command_stream_receiver.h"
#include "shared/test/common/mocks/mock_csr.h"
#include "shared/test/common/mocks/mock_driver_model.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/fence/fence.h"
@@ -15,6 +16,7 @@
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include <chrono>
#include <cstdint>
@@ -349,5 +351,117 @@ TEST_F(FenceTest, givenFenceWhenResettingThenTaskCountIsReset) {
delete fence;
}
HWTEST_F(FenceTest, givenPrintfKernelWhenSynchronizingFenceThenPrintPrintfOutputIsCalled) {
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
Mock<Kernel> kernel;
TaskCountType currentTaskCount = 33u;
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
csr.returnWaitForCompletionWithTimeout = WaitStatus::Ready;
csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
*csr.tagAddress = currentTaskCount;
commandQueue->printfKernelContainer.push_back(&kernel);
ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC,
nullptr,
0};
auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc));
ASSERT_NE(fence, nullptr);
fence->taskCount = currentTaskCount;
ze_result_t result = fence->hostSynchronize(1);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(0u, commandQueue->printfKernelContainer.size());
EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes);
EXPECT_FALSE(kernel.hangDetectedPassedToPrintfOutput);
delete fence;
commandQueue->destroy();
}
HWTEST_F(FenceTest, givenPrintfKernelAndDetectedHangWhenSynchronizingFenceThenPrintPrintfOutputAfterHangIsCalled) {
auto driverModel = new NEO::MockDriverModel();
driverModel->isGpuHangDetectedToReturn = true;
neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface.reset(new NEO::OSInterface);
neoDevice->executionEnvironment->rootDeviceEnvironments[0]->osInterface->setDriverModel(std::unique_ptr<DriverModel>(driverModel));
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
Mock<Kernel> kernel;
TaskCountType currentTaskCount = 33u;
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
csr.returnWaitForCompletionWithTimeout = WaitStatus::GpuHang;
*csr.tagAddress = 0;
csr.gpuHangCheckPeriod = 0us;
commandQueue->printfKernelContainer.push_back(&kernel);
ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC,
nullptr,
0};
auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc));
ASSERT_NE(fence, nullptr);
fence->taskCount = 1;
ze_result_t result = fence->hostSynchronize(1);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
EXPECT_EQ(0u, commandQueue->printfKernelContainer.size());
EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes);
EXPECT_TRUE(kernel.hangDetectedPassedToPrintfOutput);
delete fence;
commandQueue->destroy();
}
HWTEST_F(FenceTest, givenPrintfKernelNotCompletedWhenSynchronizingFenceWithZeroTimeoutThenPrintfOutputIsNotFlushed) {
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
Mock<Kernel> kernel;
TaskCountType currentTaskCount = 33u;
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
csr.returnWaitForCompletionWithTimeout = WaitStatus::Ready;
csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
*csr.tagAddress = currentTaskCount - 1;
commandQueue->printfKernelContainer.push_back(&kernel);
ze_fence_desc_t fenceDesc = {ZE_STRUCTURE_TYPE_FENCE_DESC,
nullptr,
0};
auto fence = whiteboxCast(Fence::create(commandQueue, &fenceDesc));
ASSERT_NE(fence, nullptr);
fence->taskCount = currentTaskCount;
ze_result_t result = fence->hostSynchronize(0);
EXPECT_EQ(ZE_RESULT_NOT_READY, result);
EXPECT_EQ(1u, commandQueue->printfKernelContainer.size());
EXPECT_EQ(0u, kernel.printPrintfOutputCalledTimes);
delete fence;
commandQueue->destroy();
}
} // namespace ult
} // namespace L0