fix: Print printf output on append to imm synchronous cmd lists

Related-To: NEO-7625

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2023-01-20 10:19:15 +00:00
committed by Compute-Runtime-Automation
parent 28bf57959e
commit 53971a2d28
3 changed files with 69 additions and 1 deletions

View File

@@ -149,6 +149,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
void checkWaitEventsState(uint32_t numWaitEvents, ze_event_handle_t *waitEventList);
protected:
void printKernelsPrintfOutput(bool hangDetected);
std::atomic<bool> dependenciesPresent{false};
};

View File

@@ -240,14 +240,15 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout;
const auto waitStatus = csr->waitForCompletionWithTimeout(NEO::WaitParams{false, false, timeoutMicroseconds}, completionStamp.taskCount);
if (waitStatus == NEO::WaitStatus::GpuHang) {
this->printKernelsPrintfOutput(true);
return ZE_RESULT_ERROR_DEVICE_LOST;
}
csr->getInternalAllocationStorage()->cleanAllocationList(completionStamp.taskCount, NEO::AllocationUsage::TEMPORARY_ALLOCATION);
this->printKernelsPrintfOutput(false);
}
this->cmdListCurrentStartOffset = commandStream->getUsed();
this->containsAnyKernel = false;
this->handlePostSubmissionState();
if (NEO::DebugManager.flags.PauseOnEnqueue.get() != -1) {
@@ -740,4 +741,12 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkWaitEventsState(uint32_
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamilyImmediate<gfxCoreFamily>::printKernelsPrintfOutput(bool hangDetected) {
size_t size = this->printfKernelContainer.size();
for (size_t i = 0; i < size; i++) {
this->printfKernelContainer[i]->printPrintfOutput(hangDetected);
}
}
} // namespace L0

View File

@@ -14,6 +14,7 @@
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/test_macros/hw_test.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
@@ -218,6 +219,63 @@ HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfUsedWhenAppendedToC
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendedToSynchronousImmCommandListThenPrintfBufferIsPrinted) {
ze_result_t returnValue;
ze_command_queue_desc_t queueDesc = {};
queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue));
commandList->isFlushTaskSubmissionEnabled = true;
Mock<Kernel> kernel;
commandList->getPrintfKernelContainer().push_back(&kernel);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes);
EXPECT_FALSE(kernel.hangDetectedPassedToPrintfOutput);
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
EXPECT_EQ(&kernel, commandList->getPrintfKernelContainer()[0]);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(2u, kernel.printPrintfOutputCalledTimes);
EXPECT_FALSE(kernel.hangDetectedPassedToPrintfOutput);
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
}
HWTEST_F(CommandListAppendLaunchKernel, givenKernelWithPrintfWhenAppendToSynchronousImmCommandListHangsThenPrintfBufferIsPrinted) {
ze_result_t returnValue;
ze_command_queue_desc_t queueDesc = {};
queueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
TaskCountType currentTaskCount = 33u;
auto &csr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
csr.latestWaitForCompletionWithTimeoutTaskCount = currentTaskCount;
csr.callBaseWaitForCompletionWithTimeout = false;
csr.returnWaitForCompletionWithTimeout = WaitStatus::GpuHang;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue));
commandList->isFlushTaskSubmissionEnabled = true;
Mock<Kernel> kernel;
commandList->getPrintfKernelContainer().push_back(&kernel);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
EXPECT_EQ(1u, kernel.printPrintfOutputCalledTimes);
EXPECT_TRUE(kernel.hangDetectedPassedToPrintfOutput);
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
EXPECT_EQ(&kernel, commandList->getPrintfKernelContainer()[0]);
result = commandList->appendLaunchKernel(kernel.toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_ERROR_DEVICE_LOST, result);
EXPECT_EQ(2u, kernel.printPrintfOutputCalledTimes);
EXPECT_TRUE(kernel.hangDetectedPassedToPrintfOutput);
EXPECT_EQ(1u, commandList->getPrintfKernelContainer().size());
}
HWTEST_F(CommandListAppendLaunchKernel, WhenAppendingMultipleTimesThenSshIsNotDepletedButReallocated) {
createKernel();
ze_result_t returnValue;