feature: enable event synchronization GPU status check by default

Related-To: GSD-10187

Signed-off-by: Wenbin Lu <wenbin.lu@intel.com>
This commit is contained in:
Wenbin Lu
2024-12-13 00:29:31 +00:00
committed by Compute-Runtime-Automation
parent e27a6dc280
commit 17e537b694
2 changed files with 15 additions and 4 deletions

View File

@@ -686,7 +686,7 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
if (device->getNEODevice()->getRootDeviceEnvironment().assertHandler.get()) {
device->getNEODevice()->getRootDeviceEnvironment().assertHandler->printAssertAndAbort();
}
if (NEO::debugManager.flags.ForceGpuStatusCheckOnSuccessfulEventHostSynchronize.get() == 1) {
if (NEO::debugManager.flags.ForceGpuStatusCheckOnSuccessfulEventHostSynchronize.get() != 0) {
const bool hangDetected = this->csrs[0]->isGpuHangDetected();
if (hangDetected) {
return ZE_RESULT_ERROR_DEVICE_LOST;

View File

@@ -1627,8 +1627,6 @@ TEST_F(EventSynchronizeTest, GivenGpuHangWhenHostSynchronizeIsCalledThenDeviceLo
}
TEST_F(EventSynchronizeTest, GivenHangHappenedBeforePeriodicHangCheckAndForceGpuStatusCheckDuringHostSynchronizeThenHangIsDetected) {
NEO::debugManager.flags.ForceGpuStatusCheckOnSuccessfulEventHostSynchronize.set(1);
const auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr->isGpuHangDetectedReturnValue = true;
@@ -1642,9 +1640,22 @@ TEST_F(EventSynchronizeTest, GivenHangHappenedBeforePeriodicHangCheckAndForceGpu
}
TEST_F(EventSynchronizeTest, GivenEventCompletedAndForceGpuStatusCheckThenHostSynchronizeReturnsSuccess) {
NEO::debugManager.flags.ForceGpuStatusCheckOnSuccessfulEventHostSynchronize.set(1);
const auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
event->csrs[0] = csr.get();
uint32_t *hostAddr = static_cast<uint32_t *>(event->getHostAddress());
*hostAddr = Event::STATE_SIGNALED;
auto result = event->hostSynchronize(0);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
}
TEST_F(EventSynchronizeTest, GivenHangHappenedBeforePeriodicHangCheckAndForceGpuStatusCheckDuringHostSynchronizeDisabledThenSuccessIsReturned) {
NEO::debugManager.flags.ForceGpuStatusCheckOnSuccessfulEventHostSynchronize.set(0);
const auto csr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr->isGpuHangDetectedReturnValue = true;
event->csrs[0] = csr.get();
uint32_t *hostAddr = static_cast<uint32_t *>(event->getHostAddress());