Check for GPU hang in path with wait for timestamps

Related-To: NEO-6868

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2022-09-19 10:20:14 +00:00
committed by Compute-Runtime-Automation
parent 1ca5d57ab0
commit 0192e8038f
10 changed files with 65 additions and 26 deletions

View File

@@ -45,7 +45,6 @@ ze_result_t Fence::reset(bool signaled) {
}
ze_result_t Fence::hostSynchronize(uint64_t timeout) {
std::chrono::microseconds elapsedTimeSinceGpuHangCheck{0};
std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, currentTime;
uint64_t timeDiff = 0;
ze_result_t ret = ZE_RESULT_NOT_READY;
@@ -72,13 +71,8 @@ ze_result_t Fence::hostSynchronize(uint64_t timeout) {
}
currentTime = std::chrono::high_resolution_clock::now();
elapsedTimeSinceGpuHangCheck = std::chrono::duration_cast<std::chrono::microseconds>(currentTime - lastHangCheckTime);
if (elapsedTimeSinceGpuHangCheck.count() >= gpuHangCheckPeriod.count()) {
lastHangCheckTime = currentTime;
if (csr->isGpuHangDetected()) {
return ZE_RESULT_ERROR_DEVICE_LOST;
}
if (csr->checkGpuHangDetected(currentTime, lastHangCheckTime)) {
return ZE_RESULT_ERROR_DEVICE_LOST;
}
if (timeout == std::numeric_limits<uint64_t>::max()) {