feature: gpu assert implementation

- allocate assert buffer when kernel has assert
- track assert kernels in cmdlists and cmdqueues
- check and print assert at sync calls: cmdqueue synchronize(), fence
synchronize(), event hostSynchronize(), synchronous imm cmdlists
append()

Related-To: NEO-5753

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2023-03-13 14:14:35 +00:00
committed by Compute-Runtime-Automation
parent f57ff2913c
commit 0204761add
27 changed files with 665 additions and 13 deletions

View File

@@ -5,6 +5,7 @@
*
*/
#include "shared/source/assert_handler/assert_handler.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/device/sub_device.h"
#include "shared/source/memory_manager/internal_allocation_storage.h"
@@ -309,6 +310,9 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
static_cast<Kernel *>(this->getKernelForPrintf())->printPrintfOutput(true);
this->setKernelForPrintf(nullptr);
}
if (device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
return ret;
}
@@ -318,6 +322,9 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
if (elapsedTimeSinceGpuHangCheck.count() >= this->gpuHangCheckPeriod.count()) {
lastHangCheckTime = currentTime;
if (this->csr->isGpuHangDetected()) {
if (device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
return ZE_RESULT_ERROR_DEVICE_LOST;
}
}
@@ -332,6 +339,9 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
} while (timeDiff < timeout);
if (device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
return ret;
}