mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Detect GPU hangs in clFinish
This change introduces detection of GPU hangs in clFinish function as well as unit tests to cover the new code. Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
cf1bc3a2ba
commit
0ecc7c5e3b
@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
@ -1497,6 +1498,28 @@ HWTEST_F(CommandQueueHwTest, givenFinishWhenFlushBatchedSubmissionsFailsThenErro
|
||||
EXPECT_EQ(CL_OUT_OF_RESOURCES, errorCode);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenGpuHangWhenFinishingCommandQueueHwThenWaitForEnginesIsCalledAndOutOfResourcesIsReturned) {
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw{context, pClDevice, nullptr};
|
||||
|
||||
mockCmdQueueHw.waitForAllEnginesReturnValue = WaitStatus::GpuHang;
|
||||
mockCmdQueueHw.getUltCommandStreamReceiver().shouldFlushBatchedSubmissionsReturnSuccess = true;
|
||||
|
||||
const auto finishResult = mockCmdQueueHw.finish();
|
||||
EXPECT_EQ(1, mockCmdQueueHw.waitForAllEnginesCalledCount);
|
||||
EXPECT_EQ(CL_OUT_OF_RESOURCES, finishResult);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandQueueHwTest, givenNoGpuHangWhenFinishingCommandQueueHwThenWaitForEnginesIsCalledAndSuccessIsReturned) {
|
||||
MockCommandQueueHw<FamilyType> mockCmdQueueHw{context, pClDevice, nullptr};
|
||||
|
||||
mockCmdQueueHw.waitForAllEnginesReturnValue = WaitStatus::Ready;
|
||||
mockCmdQueueHw.getUltCommandStreamReceiver().shouldFlushBatchedSubmissionsReturnSuccess = true;
|
||||
|
||||
const auto finishResult = mockCmdQueueHw.finish();
|
||||
EXPECT_EQ(1, mockCmdQueueHw.waitForAllEnginesCalledCount);
|
||||
EXPECT_EQ(CL_SUCCESS, finishResult);
|
||||
}
|
||||
|
||||
HWTEST_F(IoqCommandQueueHwBlitTest, givenGpgpuCsrWhenEnqueueingSubsequentBlitsThenGpgpuCommandStreamIsNotObtained) {
|
||||
auto &gpgpuCsr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
auto srcBuffer = std::unique_ptr<Buffer>{BufferHelper<>::create(pContext)};
|
||||
|
@ -1079,7 +1079,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhe
|
||||
mockCsr->taskCount.store(10);
|
||||
mockCsr->latestFlushedTaskCount.store(5);
|
||||
|
||||
commandQueue.waitForAllEngines(false, nullptr);
|
||||
const auto waitStatus = commandQueue.waitForAllEngines(false, nullptr);
|
||||
EXPECT_EQ(WaitStatus::Ready, waitStatus);
|
||||
|
||||
parseCommands<FamilyType>(mockCsr->getCS(4096u));
|
||||
auto itorPipeControl = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
@ -1110,7 +1111,8 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEnabledDirectSubmissionUpdate
|
||||
mockCsr->taskCount.store(10);
|
||||
mockCsr->latestFlushedTaskCount.store(5);
|
||||
|
||||
commandQueue.waitForAllEngines(false, nullptr);
|
||||
const auto waitStatus = commandQueue.waitForAllEngines(false, nullptr);
|
||||
EXPECT_EQ(WaitStatus::Ready, waitStatus);
|
||||
|
||||
parseCommands<FamilyType>(mockCsr->getCS(4096u));
|
||||
auto itorPipeControl = find<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
#include "opencl/source/command_queue/command_queue_hw.h"
|
||||
|
||||
#include <optional>
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// MockCommandQueue - Core implementation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -340,6 +342,16 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
return BaseClass::waitUntilComplete(gpgpuTaskCountToWait, copyEnginesToWait, flushStampToWait, useQuickKmdSleep, cleanTemporaryAllocationList, skipWait);
|
||||
}
|
||||
|
||||
WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler) override {
|
||||
waitForAllEnginesCalledCount++;
|
||||
|
||||
if (waitForAllEnginesReturnValue.has_value()) {
|
||||
return *waitForAllEnginesReturnValue;
|
||||
}
|
||||
|
||||
return BaseClass::waitForAllEngines(blockedQueue, printfHandler);
|
||||
}
|
||||
|
||||
bool isCacheFlushForBcsRequired() const override {
|
||||
if (overrideIsCacheFlushForBcsRequired.enabled) {
|
||||
return overrideIsCacheFlushForBcsRequired.returnValue;
|
||||
@ -373,6 +385,8 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
|
||||
BuiltinOpParams kernelParams;
|
||||
std::atomic<uint32_t> latestTaskCountWaited{std::numeric_limits<uint32_t>::max()};
|
||||
bool flushCalled = false;
|
||||
std::optional<WaitStatus> waitForAllEnginesReturnValue{};
|
||||
int waitForAllEnginesCalledCount{0};
|
||||
|
||||
LinearStream *peekCommandStream() {
|
||||
return this->commandStream;
|
||||
|
@ -5,6 +5,7 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/source/helpers/local_memory_access_modes.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
@ -220,13 +221,16 @@ HWTEST_F(PrintfHandlerTests, givenPrintfHandlerWhenEnqueueIsBlockedThenDontUsePr
|
||||
using CommandQueueHw<FamilyType>::CommandQueueHw;
|
||||
using CommandQueueHw<FamilyType>::enqueueKernel;
|
||||
|
||||
void waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList) override {
|
||||
WaitStatus waitForAllEngines(bool blockedQueue, PrintfHandler *printfHandler, bool cleanTemporaryAllocationsList) override {
|
||||
waitCalled = true;
|
||||
printfHandlerUsedForWait = printfHandler;
|
||||
|
||||
return waitForAllEnginesReturnValue;
|
||||
}
|
||||
|
||||
bool waitCalled = false;
|
||||
PrintfHandler *printfHandlerUsedForWait = nullptr;
|
||||
WaitStatus waitForAllEnginesReturnValue = WaitStatus::Ready;
|
||||
};
|
||||
|
||||
auto device = std::make_unique<MockClDevice>(MockDevice::createWithNewExecutionEnvironment<MockDevice>(nullptr));
|
||||
|
Reference in New Issue
Block a user