mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Add debug flag to disable GPU hang detection
This change introduces the new flag called DisableGpuHangDetection. By default it is disabled. When someone wants to disable hang checking, then this flag can be set to true. Related-To: NEO-6681 Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
9d4dacacca
commit
835b344968
@ -382,6 +382,7 @@ ReuseKernelBinaries = -1
|
|||||||
EnableChipsetUniqueUUID = -1
|
EnableChipsetUniqueUUID = -1
|
||||||
ForceSimdMessageSizeInWalker = -1
|
ForceSimdMessageSizeInWalker = -1
|
||||||
UseNewQueryTopoIoctl = 1
|
UseNewQueryTopoIoctl = 1
|
||||||
|
DisableGpuHangDetection = 0
|
||||||
EnableRecoverablePageFaults = -1
|
EnableRecoverablePageFaults = -1
|
||||||
EnableImplicitMigrationOnFaultableHardware = -1
|
EnableImplicitMigrationOnFaultableHardware = -1
|
||||||
UseDrmVirtualEnginesForCcs = -1
|
UseDrmVirtualEnginesForCcs = -1
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include "shared/source/command_stream/experimental_command_buffer.h"
|
#include "shared/source/command_stream/experimental_command_buffer.h"
|
||||||
#include "shared/source/command_stream/preemption.h"
|
#include "shared/source/command_stream/preemption.h"
|
||||||
#include "shared/source/command_stream/scratch_space_controller.h"
|
#include "shared/source/command_stream/scratch_space_controller.h"
|
||||||
|
#include "shared/source/debug_settings/debug_settings_manager.h"
|
||||||
#include "shared/source/device/device.h"
|
#include "shared/source/device/device.h"
|
||||||
#include "shared/source/direct_submission/direct_submission_controller.h"
|
#include "shared/source/direct_submission/direct_submission_controller.h"
|
||||||
#include "shared/source/execution_environment/root_device_environment.h"
|
#include "shared/source/execution_environment/root_device_environment.h"
|
||||||
@ -249,6 +250,10 @@ bool CommandStreamReceiver::skipResourceCleanup() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool CommandStreamReceiver::isGpuHangDetected() const {
|
bool CommandStreamReceiver::isGpuHangDetected() const {
|
||||||
|
if (DebugManager.flags.DisableGpuHangDetection.get()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
return this->osContext && this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->isGpuHangDetected(*osContext);
|
return this->osContext && this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->isGpuHangDetected(*osContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -73,6 +73,7 @@ DECLARE_DEBUG_VARIABLE(bool, AllowPatchingVfeStateInCommandLists, false, "true:
|
|||||||
DECLARE_DEBUG_VARIABLE(bool, PrintMemoryRegionSizes, false, "print memory bank type, instance and it's size")
|
DECLARE_DEBUG_VARIABLE(bool, PrintMemoryRegionSizes, false, "print memory bank type, instance and it's size")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, UpdateCrossThreadDataSize, false, "Turn on cross thread data size calculation for PATCH TOKEN binary")
|
DECLARE_DEBUG_VARIABLE(bool, UpdateCrossThreadDataSize, false, "Turn on cross thread data size calculation for PATCH TOKEN binary")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, UseNewQueryTopoIoctl, true, "Use DRM_I915_QUERY_COMPUTE_SLICES")
|
DECLARE_DEBUG_VARIABLE(bool, UseNewQueryTopoIoctl, true, "Use DRM_I915_QUERY_COMPUTE_SLICES")
|
||||||
|
DECLARE_DEBUG_VARIABLE(bool, DisableGpuHangDetection, false, "Disable GPU hang detection")
|
||||||
DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing")
|
DECLARE_DEBUG_VARIABLE(std::string, ForceDeviceId, std::string("unk"), "DeviceId selected for testing")
|
||||||
DECLARE_DEBUG_VARIABLE(std::string, FilterDeviceId, std::string("unk"), "Device id filter, adapter matching device id will be opened. Ignored when unk.")
|
DECLARE_DEBUG_VARIABLE(std::string, FilterDeviceId, std::string("unk"), "Device id filter, adapter matching device id will be opened. Ignored when unk.")
|
||||||
DECLARE_DEBUG_VARIABLE(std::string, FilterBdfPath, std::string("unk"), "Linux-only, BDF path filter, only matching paths will be opened. Ignored when unk.")
|
DECLARE_DEBUG_VARIABLE(std::string, FilterBdfPath, std::string("unk"), "Linux-only, BDF path filter, only matching paths will be opened. Ignored when unk.")
|
||||||
|
@ -174,6 +174,22 @@ HWTEST_F(CommandStreamReceiverTest, whenStoreAllocationThenStoredAllocationHasTa
|
|||||||
EXPECT_EQ(csr.peekTaskCount(), allocation->getTaskCount(csr.getOsContext().getContextId()));
|
EXPECT_EQ(csr.peekTaskCount(), allocation->getTaskCount(csr.getOsContext().getContextId()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(CommandStreamReceiverTest, givenDisableGpuHangDetectionFlagWhenCheckingGpuHangThenDriverModelIsNotCalledAndFalseIsReturned) {
|
||||||
|
DebugManagerStateRestore stateRestore;
|
||||||
|
DebugManager.flags.DisableGpuHangDetection.set(true);
|
||||||
|
|
||||||
|
auto driverModelMock = std::make_unique<MockDriverModel>();
|
||||||
|
driverModelMock->isGpuHangDetectedToReturn = true;
|
||||||
|
|
||||||
|
auto osInterface = std::make_unique<OSInterface>();
|
||||||
|
osInterface->setDriverModel(std::move(driverModelMock));
|
||||||
|
|
||||||
|
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface);
|
||||||
|
|
||||||
|
EXPECT_FALSE(csr.isGpuHangDetected());
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForCompletionWithTimeoutThenGpuHangIsReturned) {
|
HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForCompletionWithTimeoutThenGpuHangIsReturned) {
|
||||||
auto driverModelMock = std::make_unique<MockDriverModel>();
|
auto driverModelMock = std::make_unique<MockDriverModel>();
|
||||||
driverModelMock->isGpuHangDetectedToReturn = true;
|
driverModelMock->isGpuHangDetectedToReturn = true;
|
||||||
|
Reference in New Issue
Block a user