mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-15 13:01:45 +08:00
Implement GPU hang detection on Windows
This change uses value of cpuAddress from monitored fence to detect GPU hang. Related-To: NEO-5313 Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
61ca84e94b
commit
18cafd3a52
@ -38,6 +38,10 @@ struct MockDriverModel : NEO::DriverModel {
|
|||||||
size_t getMaxMemAllocSize() const override {
|
size_t getMaxMemAllocSize() const override {
|
||||||
return maxAllocSize;
|
return maxAllocSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isGpuHangDetected(NEO::OsContext &osContext) override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MockDriverModelWDDM : NEO::DriverModel {
|
struct MockDriverModelWDDM : NEO::DriverModel {
|
||||||
@ -51,6 +55,10 @@ struct MockDriverModelWDDM : NEO::DriverModel {
|
|||||||
size_t getMaxMemAllocSize() const override {
|
size_t getMaxMemAllocSize() const override {
|
||||||
return maxAllocSize;
|
return maxAllocSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isGpuHangDetected(NEO::OsContext &osContext) override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MockDriverModelDRM : NEO::DriverModel {
|
struct MockDriverModelDRM : NEO::DriverModel {
|
||||||
@ -64,6 +72,10 @@ struct MockDriverModelDRM : NEO::DriverModel {
|
|||||||
size_t getMaxMemAllocSize() const override {
|
size_t getMaxMemAllocSize() const override {
|
||||||
return maxAllocSize;
|
return maxAllocSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isGpuHangDetected(NEO::OsContext &osContext) override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ContextShareableMock : public L0::ContextImp {
|
struct ContextShareableMock : public L0::ContextImp {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (C) 2020-2021 Intel Corporation
|
* Copyright (C) 2020-2022 Intel Corporation
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
@ -366,6 +366,10 @@ class UnknownDriverModel : public DriverModel {
|
|||||||
PhysicalDevicePciBusInfo pciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue);
|
PhysicalDevicePciBusInfo pciBusInfo(PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue, PhysicalDevicePciBusInfo::InvalidValue);
|
||||||
return pciBusInfo;
|
return pciBusInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool isGpuHangDetected(OsContext &osContext) override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
using SysmanUnknownDriverModelTest = Test<DeviceFixture>;
|
using SysmanUnknownDriverModelTest = Test<DeviceFixture>;
|
||||||
|
@ -1038,30 +1038,6 @@ TEST(DrmTest, GivenCompletionFenceDebugFlagWhenCreatingDrmObjectThenExpectCorrec
|
|||||||
EXPECT_FALSE(drmDisabled.completionFenceSupport());
|
EXPECT_FALSE(drmDisabled.completionFenceSupport());
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DrmTest, GivenInvalidContextIdWhenIsGpuHangIsCalledThenErrorIsThrown) {
|
|
||||||
ExecutionEnvironment executionEnvironment{};
|
|
||||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
|
||||||
|
|
||||||
DrmMock drm{*executionEnvironment.rootDeviceEnvironments[0]};
|
|
||||||
uint32_t contextId{0};
|
|
||||||
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})};
|
|
||||||
|
|
||||||
CommandStreamReceiver *csr{nullptr};
|
|
||||||
MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor};
|
|
||||||
EngineControlContainer engines{EngineControl{csr, &mockOsContextLinux}};
|
|
||||||
|
|
||||||
auto memoryManager = std::make_unique<MockMemoryManager>();
|
|
||||||
auto memoryManagerRaw = memoryManager.get();
|
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines = std::move(engines);
|
|
||||||
executionEnvironment.memoryManager = std::move(memoryManager);
|
|
||||||
|
|
||||||
const auto invalidContextId = 1;
|
|
||||||
EXPECT_THROW(drm.isGpuHangDetected(invalidContextId), std::runtime_error);
|
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST(DrmTest, GivenIoctlErrorWhenIsGpuHangIsCalledThenErrorIsThrown) {
|
TEST(DrmTest, GivenIoctlErrorWhenIsGpuHangIsCalledThenErrorIsThrown) {
|
||||||
ExecutionEnvironment executionEnvironment{};
|
ExecutionEnvironment executionEnvironment{};
|
||||||
executionEnvironment.prepareRootDeviceEnvironments(1);
|
executionEnvironment.prepareRootDeviceEnvironments(1);
|
||||||
@ -1070,22 +1046,11 @@ TEST(DrmTest, GivenIoctlErrorWhenIsGpuHangIsCalledThenErrorIsThrown) {
|
|||||||
uint32_t contextId{0};
|
uint32_t contextId{0};
|
||||||
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})};
|
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})};
|
||||||
|
|
||||||
CommandStreamReceiver *csr{nullptr};
|
|
||||||
MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor};
|
MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor};
|
||||||
EngineControlContainer engines{EngineControl{csr, &mockOsContextLinux}};
|
|
||||||
|
|
||||||
auto memoryManager = std::make_unique<MockMemoryManager>();
|
|
||||||
auto memoryManagerRaw = memoryManager.get();
|
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines = std::move(engines);
|
|
||||||
executionEnvironment.memoryManager = std::move(memoryManager);
|
|
||||||
|
|
||||||
mockOsContextLinux.drmContextIds.push_back(0);
|
mockOsContextLinux.drmContextIds.push_back(0);
|
||||||
mockOsContextLinux.drmContextIds.push_back(3);
|
mockOsContextLinux.drmContextIds.push_back(3);
|
||||||
|
|
||||||
EXPECT_THROW(drm.isGpuHangDetected(0), std::runtime_error);
|
EXPECT_THROW(drm.isGpuHangDetected(mockOsContextLinux), std::runtime_error);
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DrmTest, GivenZeroBatchActiveAndZeroBatchPendingResetStatsWhenIsGpuHangIsCalledThenNoHangIsReported) {
|
TEST(DrmTest, GivenZeroBatchActiveAndZeroBatchPendingResetStatsWhenIsGpuHangIsCalledThenNoHangIsReported) {
|
||||||
@ -1096,30 +1061,20 @@ TEST(DrmTest, GivenZeroBatchActiveAndZeroBatchPendingResetStatsWhenIsGpuHangIsCa
|
|||||||
uint32_t contextId{0};
|
uint32_t contextId{0};
|
||||||
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})};
|
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})};
|
||||||
|
|
||||||
CommandStreamReceiver *csr{nullptr};
|
|
||||||
MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor};
|
MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor};
|
||||||
EngineControlContainer engines{EngineControl{csr, &mockOsContextLinux}};
|
mockOsContextLinux.drmContextIds.push_back(0);
|
||||||
|
mockOsContextLinux.drmContextIds.push_back(3);
|
||||||
auto memoryManager = std::make_unique<MockMemoryManager>();
|
|
||||||
auto memoryManagerRaw = memoryManager.get();
|
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines = std::move(engines);
|
|
||||||
executionEnvironment.memoryManager = std::move(memoryManager);
|
|
||||||
|
|
||||||
drm_i915_reset_stats resetStats{};
|
drm_i915_reset_stats resetStats{};
|
||||||
resetStats.ctx_id = 0;
|
resetStats.ctx_id = 0;
|
||||||
mockOsContextLinux.drmContextIds.push_back(0);
|
|
||||||
drm.resetStatsToReturn.push_back(resetStats);
|
drm.resetStatsToReturn.push_back(resetStats);
|
||||||
|
|
||||||
resetStats.ctx_id = 3;
|
resetStats.ctx_id = 3;
|
||||||
mockOsContextLinux.drmContextIds.push_back(3);
|
|
||||||
drm.resetStatsToReturn.push_back(resetStats);
|
drm.resetStatsToReturn.push_back(resetStats);
|
||||||
|
|
||||||
bool isGpuHangDetected{};
|
bool isGpuHangDetected{};
|
||||||
EXPECT_NO_THROW(isGpuHangDetected = drm.isGpuHangDetected(0));
|
EXPECT_NO_THROW(isGpuHangDetected = drm.isGpuHangDetected(mockOsContextLinux));
|
||||||
EXPECT_FALSE(isGpuHangDetected);
|
EXPECT_FALSE(isGpuHangDetected);
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DrmTest, GivenBatchActiveGreaterThanZeroResetStatsWhenIsGpuHangIsCalledThenHangIsReported) {
|
TEST(DrmTest, GivenBatchActiveGreaterThanZeroResetStatsWhenIsGpuHangIsCalledThenHangIsReported) {
|
||||||
@ -1130,31 +1085,21 @@ TEST(DrmTest, GivenBatchActiveGreaterThanZeroResetStatsWhenIsGpuHangIsCalledThen
|
|||||||
uint32_t contextId{0};
|
uint32_t contextId{0};
|
||||||
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})};
|
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})};
|
||||||
|
|
||||||
CommandStreamReceiver *csr{nullptr};
|
|
||||||
MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor};
|
MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor};
|
||||||
EngineControlContainer engines{EngineControl{csr, &mockOsContextLinux}};
|
mockOsContextLinux.drmContextIds.push_back(0);
|
||||||
|
mockOsContextLinux.drmContextIds.push_back(3);
|
||||||
auto memoryManager = std::make_unique<MockMemoryManager>();
|
|
||||||
auto memoryManagerRaw = memoryManager.get();
|
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines = std::move(engines);
|
|
||||||
executionEnvironment.memoryManager = std::move(memoryManager);
|
|
||||||
|
|
||||||
drm_i915_reset_stats resetStats{};
|
drm_i915_reset_stats resetStats{};
|
||||||
resetStats.ctx_id = 0;
|
resetStats.ctx_id = 0;
|
||||||
mockOsContextLinux.drmContextIds.push_back(0);
|
|
||||||
drm.resetStatsToReturn.push_back(resetStats);
|
drm.resetStatsToReturn.push_back(resetStats);
|
||||||
|
|
||||||
resetStats.ctx_id = 3;
|
resetStats.ctx_id = 3;
|
||||||
resetStats.batch_active = 2;
|
resetStats.batch_active = 2;
|
||||||
mockOsContextLinux.drmContextIds.push_back(3);
|
|
||||||
drm.resetStatsToReturn.push_back(resetStats);
|
drm.resetStatsToReturn.push_back(resetStats);
|
||||||
|
|
||||||
bool isGpuHangDetected{};
|
bool isGpuHangDetected{};
|
||||||
EXPECT_NO_THROW(isGpuHangDetected = drm.isGpuHangDetected(0));
|
EXPECT_NO_THROW(isGpuHangDetected = drm.isGpuHangDetected(mockOsContextLinux));
|
||||||
EXPECT_TRUE(isGpuHangDetected);
|
EXPECT_TRUE(isGpuHangDetected);
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DrmTest, GivenBatchPendingGreaterThanZeroResetStatsWhenIsGpuHangIsCalledThenHangIsReported) {
|
TEST(DrmTest, GivenBatchPendingGreaterThanZeroResetStatsWhenIsGpuHangIsCalledThenHangIsReported) {
|
||||||
@ -1165,27 +1110,17 @@ TEST(DrmTest, GivenBatchPendingGreaterThanZeroResetStatsWhenIsGpuHangIsCalledThe
|
|||||||
uint32_t contextId{0};
|
uint32_t contextId{0};
|
||||||
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})};
|
EngineDescriptor engineDescriptor{EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular})};
|
||||||
|
|
||||||
CommandStreamReceiver *csr{nullptr};
|
|
||||||
MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor};
|
MockOsContextLinux mockOsContextLinux{drm, contextId, engineDescriptor};
|
||||||
EngineControlContainer engines{EngineControl{csr, &mockOsContextLinux}};
|
mockOsContextLinux.drmContextIds.push_back(8);
|
||||||
|
|
||||||
auto memoryManager = std::make_unique<MockMemoryManager>();
|
|
||||||
auto memoryManagerRaw = memoryManager.get();
|
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines = std::move(engines);
|
|
||||||
executionEnvironment.memoryManager = std::move(memoryManager);
|
|
||||||
|
|
||||||
drm_i915_reset_stats resetStats{};
|
drm_i915_reset_stats resetStats{};
|
||||||
resetStats.ctx_id = 8;
|
resetStats.ctx_id = 8;
|
||||||
resetStats.batch_pending = 7;
|
resetStats.batch_pending = 7;
|
||||||
mockOsContextLinux.drmContextIds.push_back(8);
|
|
||||||
drm.resetStatsToReturn.push_back(resetStats);
|
drm.resetStatsToReturn.push_back(resetStats);
|
||||||
|
|
||||||
bool isGpuHangDetected{};
|
bool isGpuHangDetected{};
|
||||||
EXPECT_NO_THROW(isGpuHangDetected = drm.isGpuHangDetected(0));
|
EXPECT_NO_THROW(isGpuHangDetected = drm.isGpuHangDetected(mockOsContextLinux));
|
||||||
EXPECT_TRUE(isGpuHangDetected);
|
EXPECT_TRUE(isGpuHangDetected);
|
||||||
|
|
||||||
memoryManagerRaw->registeredEngines.clear();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(DrmTest, givenSetupIoctlHelperThenIoctlHelperNotNull) {
|
TEST(DrmTest, givenSetupIoctlHelperThenIoctlHelperNotNull) {
|
||||||
|
@ -38,7 +38,9 @@
|
|||||||
#include "gtest/gtest.h"
|
#include "gtest/gtest.h"
|
||||||
#include "mock_gmm_memory.h"
|
#include "mock_gmm_memory.h"
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <limits>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
namespace NEO {
|
namespace NEO {
|
||||||
@ -344,6 +346,24 @@ TEST_F(Wddm20Tests, givenGraphicsAllocationWhenItIsMappedInHeap0ThenItHasGpuAddr
|
|||||||
EXPECT_LE(gpuAddress, cannonizedHeapEnd);
|
EXPECT_LE(gpuAddress, cannonizedHeapEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(Wddm20WithMockGdiDllTests, GivenInvalidCpuAddressWhenCheckingForGpuHangThenFalseIsReturned) {
|
||||||
|
osContext->getResidencyController().getMonitoredFence().cpuAddress = nullptr;
|
||||||
|
EXPECT_FALSE(wddm->isGpuHangDetected(*osContext));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(Wddm20WithMockGdiDllTests, GivenCpuValueDifferentThanGpuHangIndicationWhenCheckingForGpuHangThenFalseIsReturned) {
|
||||||
|
constexpr auto cpuValue{777u};
|
||||||
|
ASSERT_NE(NEO::Wddm::gpuHangIndication, cpuValue);
|
||||||
|
|
||||||
|
*osContext->getResidencyController().getMonitoredFence().cpuAddress = cpuValue;
|
||||||
|
EXPECT_FALSE(wddm->isGpuHangDetected(*osContext));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(Wddm20WithMockGdiDllTests, GivenGpuHangIndicationWhenCheckingForGpuHangThenTrueIsReturned) {
|
||||||
|
*osContext->getResidencyController().getMonitoredFence().cpuAddress = NEO::Wddm::gpuHangIndication;
|
||||||
|
EXPECT_TRUE(wddm->isGpuHangDetected(*osContext));
|
||||||
|
}
|
||||||
|
|
||||||
TEST_F(Wddm20WithMockGdiDllTests, GivenThreeOsHandlesWhenAskedForDestroyAllocationsThenAllMarkedAllocationsAreDestroyed) {
|
TEST_F(Wddm20WithMockGdiDllTests, GivenThreeOsHandlesWhenAskedForDestroyAllocationsThenAllMarkedAllocationsAreDestroyed) {
|
||||||
OsHandleStorage storage;
|
OsHandleStorage storage;
|
||||||
OsHandleWin osHandle1;
|
OsHandleWin osHandle1;
|
||||||
|
@ -243,7 +243,7 @@ bool CommandStreamReceiver::skipResourceCleanup() const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool CommandStreamReceiver::isGpuHangDetected() const {
|
bool CommandStreamReceiver::isGpuHangDetected() const {
|
||||||
return this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->isGpuHangDetected(osContext->getContextId());
|
return this->osContext && this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->isGpuHangDetected(*osContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CommandStreamReceiver::cleanupResources() {
|
void CommandStreamReceiver::cleanupResources() {
|
||||||
|
@ -318,11 +318,8 @@ int Drm::queryGttSize(uint64_t >tSizeOutput) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Drm::isGpuHangDetected(uint32_t contextId) {
|
bool Drm::isGpuHangDetected(OsContext &osContext) {
|
||||||
const auto &engines = this->rootDeviceEnvironment.executionEnvironment.memoryManager->getRegisteredEngines();
|
const auto osContextLinux = static_cast<OsContextLinux *>(&osContext);
|
||||||
UNRECOVERABLE_IF(engines.size() <= contextId);
|
|
||||||
|
|
||||||
const auto osContextLinux = static_cast<OsContextLinux *>(engines[contextId].osContext);
|
|
||||||
const auto &drmContextIds = osContextLinux->getDrmContextIds();
|
const auto &drmContextIds = osContextLinux->getDrmContextIds();
|
||||||
|
|
||||||
for (const auto drmContextId : drmContextIds) {
|
for (const auto drmContextId : drmContextIds) {
|
||||||
|
@ -148,7 +148,7 @@ class Drm : public DriverModel {
|
|||||||
MOCKABLE_VIRTUAL void getPrelimVersion(std::string &prelimVersion);
|
MOCKABLE_VIRTUAL void getPrelimVersion(std::string &prelimVersion);
|
||||||
|
|
||||||
PhysicalDevicePciBusInfo getPciBusInfo() const override;
|
PhysicalDevicePciBusInfo getPciBusInfo() const override;
|
||||||
bool isGpuHangDetected(uint32_t contextId) override;
|
bool isGpuHangDetected(OsContext &osContext) override;
|
||||||
|
|
||||||
bool areNonPersistentContextsSupported() const { return nonPersistentContextsSupported; }
|
bool areNonPersistentContextsSupported() const { return nonPersistentContextsSupported; }
|
||||||
void checkNonPersistentContextsSupport();
|
void checkNonPersistentContextsSupport();
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
namespace NEO {
|
namespace NEO {
|
||||||
class ExecutionEnvironment;
|
class ExecutionEnvironment;
|
||||||
class MemoryManager;
|
class MemoryManager;
|
||||||
|
class OsContext;
|
||||||
|
|
||||||
class HwDeviceId : public NonCopyableClass {
|
class HwDeviceId : public NonCopyableClass {
|
||||||
public:
|
public:
|
||||||
@ -85,9 +86,7 @@ class DriverModel : public NonCopyableClass {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool isGpuHangDetected(uint32_t contextId) {
|
virtual bool isGpuHangDetected(OsContext &osContext) = 0;
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
DriverModelType driverModelType;
|
DriverModelType driverModelType;
|
||||||
|
@ -918,6 +918,13 @@ bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredF
|
|||||||
return status == STATUS_SUCCESS;
|
return status == STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Wddm::isGpuHangDetected(OsContext &osContext) {
|
||||||
|
const auto osContextWin = static_cast<OsContextWin *>(&osContext);
|
||||||
|
const auto &monitoredFence = osContextWin->getResidencyController().getMonitoredFence();
|
||||||
|
|
||||||
|
return monitoredFence.cpuAddress && *monitoredFence.cpuAddress == gpuHangIndication;
|
||||||
|
}
|
||||||
|
|
||||||
void Wddm::initGfxPartition(GfxPartition &outGfxPartition, uint32_t rootDeviceIndex, size_t numRootDevices, bool useExternalFrontWindowPool) const {
|
void Wddm::initGfxPartition(GfxPartition &outGfxPartition, uint32_t rootDeviceIndex, size_t numRootDevices, bool useExternalFrontWindowPool) const {
|
||||||
if (gfxPartition.SVM.Limit != 0) {
|
if (gfxPartition.SVM.Limit != 0) {
|
||||||
outGfxPartition.heapInit(HeapIndex::HEAP_SVM, gfxPartition.SVM.Base, gfxPartition.SVM.Limit - gfxPartition.SVM.Base + 1);
|
outGfxPartition.heapInit(HeapIndex::HEAP_SVM, gfxPartition.SVM.Base, gfxPartition.SVM.Limit - gfxPartition.SVM.Base + 1);
|
||||||
|
@ -23,6 +23,8 @@
|
|||||||
|
|
||||||
#include "sku_info.h"
|
#include "sku_info.h"
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <limits>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
|
||||||
@ -57,6 +59,7 @@ CREATECONTEXT_PVTDATA initPrivateData(OsContextWin &osContext);
|
|||||||
class Wddm : public DriverModel {
|
class Wddm : public DriverModel {
|
||||||
public:
|
public:
|
||||||
static constexpr DriverModelType driverModelType = DriverModelType::WDDM;
|
static constexpr DriverModelType driverModelType = DriverModelType::WDDM;
|
||||||
|
static constexpr std::uint64_t gpuHangIndication{std::numeric_limits<std::uint64_t>::max()};
|
||||||
|
|
||||||
typedef HRESULT(WINAPI *CreateDXGIFactoryFcn)(REFIID riid, void **ppFactory);
|
typedef HRESULT(WINAPI *CreateDXGIFactoryFcn)(REFIID riid, void **ppFactory);
|
||||||
typedef HRESULT(WINAPI *DXCoreCreateAdapterFactoryFcn)(REFIID riid, void **ppFactory);
|
typedef HRESULT(WINAPI *DXCoreCreateAdapterFactoryFcn)(REFIID riid, void **ppFactory);
|
||||||
@ -109,6 +112,8 @@ class Wddm : public DriverModel {
|
|||||||
|
|
||||||
MOCKABLE_VIRTUAL bool isShutdownInProgress();
|
MOCKABLE_VIRTUAL bool isShutdownInProgress();
|
||||||
|
|
||||||
|
bool isGpuHangDetected(OsContext &osContext) override;
|
||||||
|
|
||||||
bool configureDeviceAddressSpace();
|
bool configureDeviceAddressSpace();
|
||||||
const FeatureTable &getFeatureTable() const {
|
const FeatureTable &getFeatureTable() const {
|
||||||
return *featureTable;
|
return *featureTable;
|
||||||
|
@ -27,7 +27,7 @@ class MockDriverModel : public NEO::DriverModel {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isGpuHangDetected(uint32_t contextId) override {
|
bool isGpuHangDetected(NEO::OsContext &osContext) override {
|
||||||
if (isGpuHangDetectedSideEffect) {
|
if (isGpuHangDetectedSideEffect) {
|
||||||
std::invoke(isGpuHangDetectedSideEffect);
|
std::invoke(isGpuHangDetectedSideEffect);
|
||||||
}
|
}
|
||||||
|
@ -192,6 +192,10 @@ TEST_F(DeviceGetCapsTest, whenDriverModelHasLimitationForMaxMemoryAllocationSize
|
|||||||
void setGmmInputArgs(void *args) override {}
|
void setGmmInputArgs(void *args) override {}
|
||||||
uint32_t getDeviceHandle() const override { return {}; }
|
uint32_t getDeviceHandle() const override { return {}; }
|
||||||
PhysicalDevicePciBusInfo getPciBusInfo() const override { return {}; }
|
PhysicalDevicePciBusInfo getPciBusInfo() const override { return {}; }
|
||||||
|
bool isGpuHangDetected(NEO::OsContext &osContext) override {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
size_t getMaxMemAllocSize() const override {
|
size_t getMaxMemAllocSize() const override {
|
||||||
return maxAllocSize;
|
return maxAllocSize;
|
||||||
}
|
}
|
||||||
|
Reference in New Issue
Block a user