mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
Detect GPU hang in clWaitForEvents
This change: - moves NEO::WaitStatus to a separate file - enables detection of GPU hang in clWaitForEvents - adjusts most of blocking calls in CommandStreamReceiver to return WaitStatus - adds ULTs to cover the new code Related-To: NEO-6681 Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f2e1361541
commit
7f729b7f89
@@ -229,19 +229,25 @@ bool CommandQueue::isCompleted(uint32_t gpgpuTaskCount, CopyEngineState bcsState
|
||||
return false;
|
||||
}
|
||||
|
||||
void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) {
|
||||
WaitStatus CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEngineState> copyEnginesToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool cleanTemporaryAllocationList, bool skipWait) {
|
||||
WAIT_ENTER()
|
||||
|
||||
WaitStatus waitStatus{WaitStatus::Ready};
|
||||
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Waiting for taskCount:", gpgpuTaskCountToWait);
|
||||
DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "Current taskCount:", getHwTag());
|
||||
|
||||
if (!skipWait) {
|
||||
bool forcePowerSavingMode = this->throttle == QueueThrottle::LOW;
|
||||
|
||||
getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
|
||||
flushStampToWait,
|
||||
useQuickKmdSleep,
|
||||
forcePowerSavingMode);
|
||||
waitStatus = getGpgpuCommandStreamReceiver().waitForTaskCountWithKmdNotifyFallback(gpgpuTaskCountToWait,
|
||||
flushStampToWait,
|
||||
useQuickKmdSleep,
|
||||
forcePowerSavingMode);
|
||||
if (waitStatus == WaitStatus::GpuHang) {
|
||||
return WaitStatus::GpuHang;
|
||||
}
|
||||
|
||||
DEBUG_BREAK_IF(getHwTag() < gpgpuTaskCountToWait);
|
||||
|
||||
if (gtpinIsGTPinInitialized()) {
|
||||
@@ -251,17 +257,25 @@ void CommandQueue::waitUntilComplete(uint32_t gpgpuTaskCountToWait, Range<CopyEn
|
||||
|
||||
for (const CopyEngineState ©Engine : copyEnginesToWait) {
|
||||
auto bcsCsr = getBcsCommandStreamReceiver(copyEngine.engineType);
|
||||
bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false);
|
||||
bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount);
|
||||
|
||||
waitStatus = bcsCsr->waitForTaskCountWithKmdNotifyFallback(copyEngine.taskCount, 0, false, false);
|
||||
if (waitStatus == WaitStatus::GpuHang) {
|
||||
return WaitStatus::GpuHang;
|
||||
}
|
||||
|
||||
waitStatus = bcsCsr->waitForTaskCountAndCleanTemporaryAllocationList(copyEngine.taskCount);
|
||||
if (waitStatus == WaitStatus::GpuHang) {
|
||||
return WaitStatus::GpuHang;
|
||||
}
|
||||
}
|
||||
|
||||
if (cleanTemporaryAllocationList) {
|
||||
getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait);
|
||||
} else {
|
||||
getGpgpuCommandStreamReceiver().waitForTaskCount(gpgpuTaskCountToWait);
|
||||
}
|
||||
waitStatus = cleanTemporaryAllocationList
|
||||
? getGpgpuCommandStreamReceiver().waitForTaskCountAndCleanTemporaryAllocationList(gpgpuTaskCountToWait)
|
||||
: getGpgpuCommandStreamReceiver().waitForTaskCount(gpgpuTaskCountToWait);
|
||||
|
||||
WAIT_LEAVE()
|
||||
|
||||
return waitStatus;
|
||||
}
|
||||
|
||||
bool CommandQueue::isQueueBlocked() {
|
||||
|
||||
Reference in New Issue
Block a user