mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
Detect GPU hangs in CommandMapUnmap::submit()
This change introduces detection of GPU hangs in CommandMapUnmap::submit() as well as in Event::submitCommand(). ULTs have been added to cover the new code. Related-To: NEO-6681 Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
8c4b2aafa1
commit
4cde6ea1ce
@@ -587,10 +587,17 @@ void Event::submitCommand(bool abortTasks) {
|
||||
this->cmdQueue->getGpgpuCommandStreamReceiver().makeResident(*perfCounterNode->getBaseGraphicsAllocation());
|
||||
}
|
||||
}
|
||||
|
||||
auto &complStamp = cmdToProcess->submit(taskLevel, abortTasks);
|
||||
if (profilingCpuPath && this->isProfilingEnabled()) {
|
||||
setEndTimeStamp();
|
||||
}
|
||||
|
||||
if (complStamp.taskCount == CompletionStamp::gpuHang) {
|
||||
abortExecutionDueToGpuHang();
|
||||
return;
|
||||
}
|
||||
|
||||
updateTaskCount(complStamp.taskCount, peekBcsTaskCountFromCommandQueue());
|
||||
flushStamp->setStamp(complStamp.flushStamp);
|
||||
submittedCmd.exchange(cmdToProcess.release());
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "shared/source/command_stream/csr_deps.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/engine_node_helper.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
@@ -38,9 +39,10 @@ CommandMapUnmap::CommandMapUnmap(MapOperationType operationType, MemObj &memObj,
|
||||
}
|
||||
|
||||
CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
||||
DecRefInternalAtScopeEnd decRefInternalAtScopeEnd{memObj};
|
||||
|
||||
if (terminated) {
|
||||
this->terminated = true;
|
||||
memObj.decRefInternal();
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
@@ -98,7 +100,12 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
||||
commandQueue.updateLatestSentEnqueueType(EnqueueProperties::Operation::DependencyResolveOnGpu);
|
||||
|
||||
if (!memObj.isMemObjZeroCopy()) {
|
||||
commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
|
||||
const auto waitStatus = commandQueue.waitUntilComplete(completionStamp.taskCount, {}, completionStamp.flushStamp, false);
|
||||
if (waitStatus == WaitStatus::GpuHang) {
|
||||
completionStamp.taskCount = CompletionStamp::gpuHang;
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
if (operationType == MAP) {
|
||||
memObj.transferDataToHostPtr(copySize, copyOffset);
|
||||
} else if (!readOnly) {
|
||||
@@ -107,8 +114,6 @@ CompletionStamp &CommandMapUnmap::submit(uint32_t taskLevel, bool terminated) {
|
||||
}
|
||||
}
|
||||
|
||||
memObj.decRefInternal();
|
||||
|
||||
return completionStamp;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user