mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 13:33:02 +08:00
Implement GPU hang detection
This change uses DRM_IOCTL_I915_GET_RESET_STATS to detect GPU hangs. When such situation is encountered, then zeCommandQueueSynchronize returns ZE_RESULT_ERROR_DEVICE_LOST. Related-To: NEO-5313 Signed-off-by: Patryk Wrobel <patryk.wrobel@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
543c854a3b
commit
498cf5e871
@@ -63,7 +63,7 @@ class AUBCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
|
||||
MOCKABLE_VIRTUAL void submitBatchBufferAub(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits);
|
||||
void pollForCompletion() override;
|
||||
void pollForCompletionImpl() override;
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
|
||||
uint32_t getDumpHandle();
|
||||
MOCKABLE_VIRTUAL void addContextToken(uint32_t dumpHandle);
|
||||
|
||||
@@ -606,9 +606,11 @@ void AUBCommandStreamReceiverHw<GfxFamily>::pollForCompletionImpl() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
|
||||
inline WaitStatus AUBCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
const auto result = CommandStreamReceiverSimulatedHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
|
||||
pollForCompletion();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -227,6 +227,10 @@ bool CommandStreamReceiver::skipResourceCleanup() const {
|
||||
return this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->skipResourceCleanup();
|
||||
}
|
||||
|
||||
bool CommandStreamReceiver::isGpuHangDetected() const {
|
||||
return this->getOSInterface() && this->getOSInterface()->getDriverModel() && this->getOSInterface()->getDriverModel()->isGpuHangDetected(osContext->getContextId());
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::cleanupResources() {
|
||||
if (this->skipResourceCleanup()) {
|
||||
return;
|
||||
@@ -286,19 +290,21 @@ void CommandStreamReceiver::cleanupResources() {
|
||||
}
|
||||
}
|
||||
|
||||
bool CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||
WaitStatus CommandStreamReceiver::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
|
||||
if (latestSentTaskCount < taskCountToWait) {
|
||||
if (!this->flushBatchedSubmissions()) {
|
||||
return false;
|
||||
const auto isGpuHang{isGpuHangDetected()};
|
||||
return isGpuHang ? WaitStatus::GpuHang : WaitStatus::NotReady;
|
||||
}
|
||||
}
|
||||
|
||||
return baseWaitFunction(getTagAddress(), enableTimeout, timeoutMicroseconds, taskCountToWait);
|
||||
}
|
||||
|
||||
bool CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||
std::chrono::high_resolution_clock::time_point time1, time2;
|
||||
WaitStatus CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||
std::chrono::microseconds elapsedTimeSinceGpuHangCheck{0};
|
||||
std::chrono::high_resolution_clock::time_point waitStartTime, lastHangCheckTime, currentTime;
|
||||
int64_t timeDiff = 0;
|
||||
|
||||
uint32_t latestSentTaskCount = this->latestFlushedTaskCount;
|
||||
@@ -308,23 +314,33 @@ bool CommandStreamReceiver::baseWaitFunction(volatile uint32_t *pollAddress, boo
|
||||
|
||||
volatile uint32_t *partitionAddress = pollAddress;
|
||||
|
||||
time1 = std::chrono::high_resolution_clock::now();
|
||||
waitStartTime = std::chrono::high_resolution_clock::now();
|
||||
lastHangCheckTime = waitStartTime;
|
||||
for (uint32_t i = 0; i < activePartitions; i++) {
|
||||
while (*partitionAddress < taskCountToWait && timeDiff <= timeoutMicroseconds) {
|
||||
if (WaitUtils::waitFunction(partitionAddress, taskCountToWait)) {
|
||||
break;
|
||||
}
|
||||
|
||||
currentTime = std::chrono::high_resolution_clock::now();
|
||||
elapsedTimeSinceGpuHangCheck = std::chrono::duration_cast<std::chrono::microseconds>(currentTime - lastHangCheckTime);
|
||||
|
||||
if (elapsedTimeSinceGpuHangCheck.count() >= gpuHangCheckPeriod.count()) {
|
||||
lastHangCheckTime = currentTime;
|
||||
if (isGpuHangDetected()) {
|
||||
return WaitStatus::GpuHang;
|
||||
}
|
||||
}
|
||||
|
||||
if (enableTimeout) {
|
||||
time2 = std::chrono::high_resolution_clock::now();
|
||||
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(time2 - time1).count();
|
||||
timeDiff = std::chrono::duration_cast<std::chrono::microseconds>(currentTime - waitStartTime).count();
|
||||
}
|
||||
}
|
||||
|
||||
partitionAddress = ptrOffset(partitionAddress, this->postSyncWriteOffset);
|
||||
}
|
||||
|
||||
return testTaskCountReady(pollAddress, taskCountToWait);
|
||||
return testTaskCountReady(pollAddress, taskCountToWait) ? WaitStatus::Ready : WaitStatus::NotReady;
|
||||
}
|
||||
|
||||
void CommandStreamReceiver::setTagAllocation(GraphicsAllocation *allocation) {
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "shared/source/os_interface/os_thread.h"
|
||||
#include "shared/source/utilities/spinlock.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
@@ -63,6 +64,12 @@ enum class DispatchMode {
|
||||
BatchedDispatch // dispatching is batched, explicit clFlush is required
|
||||
};
|
||||
|
||||
enum class WaitStatus {
|
||||
NotReady = 0,
|
||||
Ready = 1,
|
||||
GpuHang = 2,
|
||||
};
|
||||
|
||||
class CommandStreamReceiver {
|
||||
public:
|
||||
enum class SamplerCacheFlushState {
|
||||
@@ -158,9 +165,9 @@ class CommandStreamReceiver {
|
||||
void requestStallingCommandsOnNextFlush() { stallingCommandsOnNextFlushRequired = true; }
|
||||
bool isStallingCommandsOnNextFlushRequired() const { return stallingCommandsOnNextFlushRequired; }
|
||||
|
||||
virtual void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
|
||||
virtual bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
|
||||
bool baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
|
||||
virtual WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) = 0;
|
||||
virtual WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
|
||||
WaitStatus baseWaitFunction(volatile uint32_t *pollAddress, bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait);
|
||||
bool testTaskCountReady(volatile uint32_t *pollAddress, uint32_t taskCountToWait);
|
||||
virtual void downloadAllocations(){};
|
||||
|
||||
@@ -316,6 +323,7 @@ class CommandStreamReceiver {
|
||||
void printDeviceIndex();
|
||||
void checkForNewResources(uint32_t submittedTaskCount, uint32_t allocationTaskCount, GraphicsAllocation &gfxAllocation);
|
||||
bool checkImplicitFlushForGpuIdle();
|
||||
bool isGpuHangDetected() const;
|
||||
MOCKABLE_VIRTUAL std::unique_lock<MutexType> obtainHostPtrSurfaceCreationLock();
|
||||
|
||||
std::unique_ptr<FlushStampTracker> flushStamp;
|
||||
@@ -373,6 +381,7 @@ class CommandStreamReceiver {
|
||||
SamplerCacheFlushState samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushNotRequired;
|
||||
PreemptionMode lastPreemptionMode = PreemptionMode::Initial;
|
||||
|
||||
std::chrono::microseconds gpuHangCheckPeriod{500'000};
|
||||
uint32_t lastSentL3Config = 0;
|
||||
uint32_t latestSentStatelessMocsConfig = 0;
|
||||
uint32_t lastSentNumGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||
|
||||
@@ -77,7 +77,7 @@ class CommandStreamReceiverHw : public CommandStreamReceiver {
|
||||
bool isPipelineSelectAlreadyProgrammed() const;
|
||||
void programComputeMode(LinearStream &csr, DispatchFlags &dispatchFlags, const HardwareInfo &hwInfo);
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
|
||||
void collectStateBaseAddresPatchInfo(
|
||||
uint64_t commandBufferAddress,
|
||||
|
||||
@@ -859,7 +859,7 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForPipelineSelect()
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
inline WaitStatus CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
int64_t waitTimeout = 0;
|
||||
bool enableTimeout = false;
|
||||
|
||||
@@ -870,12 +870,18 @@ inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFal
|
||||
"\nWaiting for task count %u at location %p. Current value: %u\n",
|
||||
taskCountToWait, getTagAddress(), *getTagAddress());
|
||||
|
||||
bool status = waitForCompletionWithTimeout(enableTimeout, waitTimeout, taskCountToWait);
|
||||
if (!status) {
|
||||
auto status = waitForCompletionWithTimeout(enableTimeout, waitTimeout, taskCountToWait);
|
||||
if (status == WaitStatus::NotReady) {
|
||||
waitForFlushStamp(flushStampToWait);
|
||||
//now call blocking wait, this is to ensure that task count is reached
|
||||
status = waitForCompletionWithTimeout(false, 0, taskCountToWait);
|
||||
}
|
||||
|
||||
// If GPU hang occured, then propagate it to the caller.
|
||||
if (status == WaitStatus::GpuHang) {
|
||||
return status;
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(*getTagAddress() < taskCountToWait);
|
||||
|
||||
if (kmdNotifyHelper->quickKmdSleepForSporadicWaitsEnabled()) {
|
||||
@@ -884,6 +890,8 @@ inline void CommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFal
|
||||
|
||||
PRINT_DEBUG_STRING(DebugManager.flags.LogWaitingForCompletion.get(), stdout,
|
||||
"\nWaiting completed. Current value: %u\n", *getTagAddress());
|
||||
|
||||
return WaitStatus::Ready;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
|
||||
@@ -39,8 +39,8 @@ class CommandStreamReceiverWithAUBDump : public BaseCSR {
|
||||
return CommandStreamReceiverType::CSR_HW_WITH_AUB;
|
||||
}
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
|
||||
size_t getPreferredTagPoolSize() const override { return 1; }
|
||||
|
||||
|
||||
@@ -78,13 +78,13 @@ void CommandStreamReceiverWithAUBDump<BaseCSR>::setupContext(OsContext &osContex
|
||||
}
|
||||
|
||||
template <typename BaseCSR>
|
||||
void CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
WaitStatus CommandStreamReceiverWithAUBDump<BaseCSR>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait,
|
||||
bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
if (aubCSR) {
|
||||
aubCSR->waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
|
||||
}
|
||||
|
||||
BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
|
||||
return BaseCSR::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
|
||||
}
|
||||
|
||||
template <typename BaseCSR>
|
||||
|
||||
@@ -42,8 +42,8 @@ class TbxCommandStreamReceiverHw : public CommandStreamReceiverSimulatedHw<GfxFa
|
||||
|
||||
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override;
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) override;
|
||||
WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override;
|
||||
void downloadAllocations() override;
|
||||
void downloadAllocation(GraphicsAllocation &gfxAllocation) override;
|
||||
|
||||
|
||||
@@ -492,13 +492,13 @@ void TbxCommandStreamReceiverHw<GfxFamily>::flushSubmissionsAndDownloadAllocatio
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
void TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool useQuickKmdSleep, bool forcePowerSavingMode) {
|
||||
flushSubmissionsAndDownloadAllocations(taskCountToWait);
|
||||
BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
|
||||
return BaseClass::waitForTaskCountWithKmdNotifyFallback(taskCountToWait, flushStampToWait, useQuickKmdSleep, forcePowerSavingMode);
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||
WaitStatus TbxCommandStreamReceiverHw<GfxFamily>::waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) {
|
||||
flushSubmissionsAndDownloadAllocations(taskCountToWait);
|
||||
return BaseClass::waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
|
||||
}
|
||||
|
||||
@@ -317,6 +317,28 @@ int Drm::queryGttSize(uint64_t >tSizeOutput) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool Drm::isGpuHangDetected(uint32_t contextId) {
|
||||
const auto &engines = this->rootDeviceEnvironment.executionEnvironment.memoryManager->getRegisteredEngines();
|
||||
UNRECOVERABLE_IF(engines.size() <= contextId);
|
||||
|
||||
const auto osContextLinux = static_cast<OsContextLinux *>(engines[contextId].osContext);
|
||||
const auto &drmContextIds = osContextLinux->getDrmContextIds();
|
||||
|
||||
for (const auto drmContextId : drmContextIds) {
|
||||
drm_i915_reset_stats reset_stats{};
|
||||
reset_stats.ctx_id = drmContextId;
|
||||
|
||||
const auto retVal{ioctl(DRM_IOCTL_I915_GET_RESET_STATS, &reset_stats)};
|
||||
UNRECOVERABLE_IF(retVal != 0);
|
||||
|
||||
if (reset_stats.batch_active > 0 || reset_stats.batch_pending > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void Drm::checkPreemptionSupport() {
|
||||
int value = 0;
|
||||
auto ret = getParamIoctl(I915_PARAM_HAS_SCHEDULER, &value);
|
||||
|
||||
@@ -148,6 +148,7 @@ class Drm : public DriverModel {
|
||||
MOCKABLE_VIRTUAL void getPrelimVersion(std::string &prelimVersion);
|
||||
|
||||
PhysicalDevicePciBusInfo getPciBusInfo() const override;
|
||||
bool isGpuHangDetected(uint32_t contextId) override;
|
||||
|
||||
bool areNonPersistentContextsSupported() const { return nonPersistentContextsSupported; }
|
||||
void checkNonPersistentContextsSupport();
|
||||
|
||||
@@ -85,6 +85,10 @@ class DriverModel : public NonCopyableClass {
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual bool isGpuHangDetected(uint32_t contextId) {
|
||||
return false;
|
||||
}
|
||||
|
||||
protected:
|
||||
DriverModelType driverModelType;
|
||||
};
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2021 Intel Corporation
|
||||
* Copyright (C) 2018-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -17,4 +17,5 @@ bool OSInterface::requiresSupportForWddmTrimNotification = true;
|
||||
bool OSInterface::isDebugAttachAvailable() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace NEO
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2021 Intel Corporation
|
||||
* Copyright (C) 2019-2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -204,6 +204,18 @@ int DrmMock::ioctl(unsigned long request, void *arg) {
|
||||
if (request == DRM_IOCTL_GEM_CLOSE) {
|
||||
return 0;
|
||||
}
|
||||
if (request == DRM_IOCTL_I915_GET_RESET_STATS && arg != nullptr) {
|
||||
auto outResetStats = static_cast<drm_i915_reset_stats *>(arg);
|
||||
for (const auto &resetStats : resetStatsToReturn) {
|
||||
if (resetStats.ctx_id == outResetStats->ctx_id) {
|
||||
*outResetStats = resetStats;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (request == DRM_IOCTL_I915_QUERY && arg != nullptr) {
|
||||
auto queryArg = static_cast<drm_i915_query *>(arg);
|
||||
auto queryItemArg = reinterpret_cast<drm_i915_query_item *>(queryArg->items_ptr);
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include <fstream>
|
||||
#include <limits.h>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
using namespace NEO;
|
||||
|
||||
@@ -145,11 +146,11 @@ class DrmMock : public Drm {
|
||||
int storedExecSoftPin = 0;
|
||||
int storedRetValForVmId = 1;
|
||||
int storedCsTimestampFrequency = 1000;
|
||||
|
||||
bool disableSomeTopology = false;
|
||||
bool allowDebugAttach = false;
|
||||
bool allowDebugAttachCallBase = false;
|
||||
uint32_t passedContextDebugId = std::numeric_limits<uint32_t>::max();
|
||||
std::vector<drm_i915_reset_stats> resetStatsToReturn{};
|
||||
|
||||
drm_i915_gem_context_create_ext_setparam receivedContextCreateSetParam = {};
|
||||
uint32_t receivedContextCreateFlags = 0;
|
||||
|
||||
@@ -18,6 +18,7 @@
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -77,6 +78,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
using BaseClass::CommandStreamReceiver::experimentalCmdBuffer;
|
||||
using BaseClass::CommandStreamReceiver::flushStamp;
|
||||
using BaseClass::CommandStreamReceiver::globalFenceAllocation;
|
||||
using BaseClass::CommandStreamReceiver::gpuHangCheckPeriod;
|
||||
using BaseClass::CommandStreamReceiver::GSBAFor32BitProgrammed;
|
||||
using BaseClass::CommandStreamReceiver::initDirectSubmission;
|
||||
using BaseClass::CommandStreamReceiver::internalAllocationStorage;
|
||||
@@ -122,7 +124,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield)
|
||||
: BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield), recursiveLockCounter(0),
|
||||
recordedDispatchFlags(DispatchFlagsHelper::createDefaultDispatchFlags()) {}
|
||||
recordedDispatchFlags(DispatchFlagsHelper::createDefaultDispatchFlags()) {
|
||||
}
|
||||
static CommandStreamReceiver *create(bool withAubDump,
|
||||
ExecutionEnvironment &executionEnvironment,
|
||||
uint32_t rootDeviceIndex,
|
||||
@@ -169,7 +172,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
downloadAllocationCalled = true;
|
||||
}
|
||||
|
||||
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
|
||||
WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
|
||||
latestWaitForCompletionWithTimeoutTaskCount.store(taskCountToWait);
|
||||
waitForCompletionWithTimeoutTaskCountCalled++;
|
||||
if (callBaseWaitForCompletionWithTimeout) {
|
||||
@@ -222,6 +225,11 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
}
|
||||
bool flushBatchedSubmissions() override {
|
||||
flushBatchedSubmissionsCalled = true;
|
||||
|
||||
if (shouldFailFlushBatchedSubmissions) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions();
|
||||
}
|
||||
void initProgrammingFlags() override {
|
||||
@@ -328,6 +336,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily>, publ
|
||||
bool blitterDirectSubmissionAvailable = false;
|
||||
bool callBaseIsMultiOsContextCapable = false;
|
||||
bool callBaseWaitForCompletionWithTimeout = true;
|
||||
bool returnWaitForCompletionWithTimeout = true;
|
||||
bool shouldFailFlushBatchedSubmissions = false;
|
||||
WaitStatus returnWaitForCompletionWithTimeout = WaitStatus::Ready;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -45,6 +45,7 @@ set(NEO_CORE_tests_mocks
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_direct_submission_diagnostic_collector.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_direct_submission_hw.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_dispatch_kernel_encoder_interface.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_driver_model.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_elf.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_execution_environment.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/mock_experimental_command_buffer.h
|
||||
@@ -105,6 +106,7 @@ else()
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_allocation.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_command_stream_receiver.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_drm_memory_manager.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/linux/mock_os_context_linux.h
|
||||
)
|
||||
endif()
|
||||
|
||||
|
||||
16
shared/test/common/mocks/linux/mock_os_context_linux.h
Normal file
16
shared/test/common/mocks/linux/mock_os_context_linux.h
Normal file
@@ -0,0 +1,16 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/os_interface/linux/os_context_linux.h"
|
||||
|
||||
class MockOsContextLinux : public NEO::OsContextLinux {
|
||||
public:
|
||||
using NEO::OsContextLinux::drmContextIds;
|
||||
using NEO::OsContextLinux::OsContextLinux;
|
||||
};
|
||||
@@ -121,8 +121,8 @@ struct MockAubCsr : public AUBCommandStreamReceiverHw<GfxFamily> {
|
||||
expectMemoryCompressedCalled = true;
|
||||
return AUBCommandStreamReceiverHw<GfxFamily>::expectMemoryCompressed(gfxAddress, srcAddress, length);
|
||||
}
|
||||
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
|
||||
return true;
|
||||
WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
|
||||
return NEO::WaitStatus::Ready;
|
||||
}
|
||||
void addAubComment(const char *message) override {
|
||||
AUBCommandStreamReceiverHw<GfxFamily>::addAubComment(message);
|
||||
|
||||
@@ -50,9 +50,9 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
memset(const_cast<uint32_t *>(CommandStreamReceiver::tagAddress), 0xFFFFFFFF, tagSize * sizeof(uint32_t));
|
||||
}
|
||||
|
||||
bool waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
|
||||
WaitStatus waitForCompletionWithTimeout(bool enableTimeout, int64_t timeoutMicroseconds, uint32_t taskCountToWait) override {
|
||||
waitForCompletionWithTimeoutCalled++;
|
||||
return true;
|
||||
return NEO::WaitStatus::Ready;
|
||||
}
|
||||
SubmissionStatus flush(BatchBuffer &batchBuffer, ResidencyContainer &allocationsForResidency) override;
|
||||
|
||||
@@ -86,7 +86,8 @@ class MockCommandStreamReceiver : public CommandStreamReceiver {
|
||||
return true;
|
||||
}
|
||||
|
||||
void waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
|
||||
WaitStatus waitForTaskCountWithKmdNotifyFallback(uint32_t taskCountToWait, FlushStamp flushStampToWait, bool quickKmdSleep, bool forcePowerSavingMode) override {
|
||||
return WaitStatus::Ready;
|
||||
}
|
||||
|
||||
uint32_t blitBuffer(const BlitPropertiesContainer &blitPropertiesContainer, bool blocking, bool profilingEnabled, Device &device) override { return taskCount; };
|
||||
@@ -197,7 +198,8 @@ class MockCsrHw2 : public CommandStreamReceiverHw<GfxFamily> {
|
||||
using CommandStreamReceiver::useNewResourceImplicitFlush;
|
||||
|
||||
MockCsrHw2(ExecutionEnvironment &executionEnvironment, uint32_t rootDeviceIndex, const DeviceBitfield deviceBitfield)
|
||||
: CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {}
|
||||
: CommandStreamReceiverHw<GfxFamily>::CommandStreamReceiverHw(executionEnvironment, rootDeviceIndex, deviceBitfield) {
|
||||
}
|
||||
|
||||
SubmissionAggregator *peekSubmissionAggregator() {
|
||||
return this->submissionAggregator.get();
|
||||
|
||||
41
shared/test/common/mocks/mock_driver_model.h
Normal file
41
shared/test/common/mocks/mock_driver_model.h
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "shared/source/os_interface/driver_info.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
|
||||
class MockDriverModel : public NEO::DriverModel {
|
||||
public:
|
||||
MockDriverModel() : NEO::DriverModel(NEO::DriverModelType::UNKNOWN) {}
|
||||
|
||||
void setGmmInputArgs(void *args) override {}
|
||||
|
||||
uint32_t getDeviceHandle() const override { return {}; }
|
||||
|
||||
NEO::PhysicalDevicePciBusInfo getPciBusInfo() const override { return pciBusInfo; }
|
||||
|
||||
size_t getMaxMemAllocSize() const override {
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool isGpuHangDetected(uint32_t contextId) override {
|
||||
if (isGpuHangDetectedSideEffect) {
|
||||
std::invoke(isGpuHangDetectedSideEffect);
|
||||
}
|
||||
|
||||
return isGpuHangDetectedToReturn;
|
||||
}
|
||||
|
||||
NEO::PhysicalDevicePciBusInfo pciBusInfo{};
|
||||
bool isGpuHangDetectedToReturn{};
|
||||
std::function<void()> isGpuHangDetectedSideEffect{};
|
||||
};
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "shared/source/memory_manager/surface.h"
|
||||
#include "shared/source/os_interface/device_factory.h"
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
#include "shared/source/utilities/tag_allocator.h"
|
||||
#include "shared/test/common/fixtures/device_fixture.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
@@ -20,6 +21,7 @@
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/mocks/mock_allocation_properties.h"
|
||||
#include "shared/test/common/mocks/mock_csr.h"
|
||||
#include "shared/test/common/mocks/mock_driver_model.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/mocks/mock_memory_manager.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
@@ -30,10 +32,15 @@
|
||||
|
||||
#include "gmock/gmock.h"
|
||||
|
||||
#include <chrono>
|
||||
#include <functional>
|
||||
#include <limits>
|
||||
|
||||
namespace NEO {
|
||||
extern ApiSpecificConfig::ApiType apiTypeForUlts;
|
||||
} // namespace NEO
|
||||
using namespace NEO;
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
struct CommandStreamReceiverTest : public DeviceFixture,
|
||||
public ::testing::Test {
|
||||
@@ -165,6 +172,99 @@ HWTEST_F(CommandStreamReceiverTest, whenStoreAllocationThenStoredAllocationHasTa
|
||||
EXPECT_EQ(csr.peekTaskCount(), allocation->getTaskCount(csr.getOsContext().getContextId()));
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenGpuHangWhenWaititingForCompletionWithTimeoutThenGpuHangIsReturned) {
|
||||
auto driverModelMock = std::make_unique<MockDriverModel>();
|
||||
driverModelMock->isGpuHangDetectedToReturn = true;
|
||||
|
||||
auto osInterface = std::make_unique<OSInterface>();
|
||||
osInterface->setDriverModel(std::move(driverModelMock));
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface);
|
||||
csr.callBaseWaitForCompletionWithTimeout = true;
|
||||
csr.activePartitions = 1;
|
||||
csr.gpuHangCheckPeriod = 0us;
|
||||
|
||||
volatile std::uint32_t tasksCount[16] = {};
|
||||
csr.tagAddress = tasksCount;
|
||||
|
||||
constexpr auto enableTimeout = false;
|
||||
constexpr auto timeoutMicroseconds = std::numeric_limits<std::int64_t>::max();
|
||||
constexpr auto taskCountToWait = 1;
|
||||
|
||||
const auto waitStatus = csr.waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
|
||||
EXPECT_EQ(WaitStatus::GpuHang, waitStatus);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenNoGpuHangWhenWaititingForCompletionWithTimeoutThenReadyIsReturned) {
|
||||
auto driverModelMock = std::make_unique<MockDriverModel>();
|
||||
driverModelMock->isGpuHangDetectedToReturn = false;
|
||||
|
||||
volatile std::uint32_t tasksCount[16] = {};
|
||||
driverModelMock->isGpuHangDetectedSideEffect = [&tasksCount] {
|
||||
tasksCount[0]++;
|
||||
};
|
||||
|
||||
auto osInterface = std::make_unique<OSInterface>();
|
||||
osInterface->setDriverModel(std::move(driverModelMock));
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface);
|
||||
csr.callBaseWaitForCompletionWithTimeout = true;
|
||||
csr.tagAddress = tasksCount;
|
||||
csr.activePartitions = 1;
|
||||
csr.gpuHangCheckPeriod = 0us;
|
||||
|
||||
constexpr auto enableTimeout = false;
|
||||
constexpr auto timeoutMicroseconds = std::numeric_limits<std::int64_t>::max();
|
||||
constexpr auto taskCountToWait = 1;
|
||||
|
||||
const auto waitStatus = csr.waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
|
||||
EXPECT_EQ(WaitStatus::Ready, waitStatus);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenFailingFlushSubmissionsAndGpuHangWhenWaititingForCompletionWithTimeoutThenGpuHangIsReturned) {
|
||||
auto driverModelMock = std::make_unique<MockDriverModel>();
|
||||
driverModelMock->isGpuHangDetectedToReturn = true;
|
||||
|
||||
auto osInterface = std::make_unique<OSInterface>();
|
||||
osInterface->setDriverModel(std::move(driverModelMock));
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.latestFlushedTaskCount = 0;
|
||||
csr.shouldFailFlushBatchedSubmissions = true;
|
||||
csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface);
|
||||
csr.callBaseWaitForCompletionWithTimeout = true;
|
||||
|
||||
constexpr auto enableTimeout = false;
|
||||
constexpr auto timeoutMicroseconds = std::numeric_limits<std::int64_t>::max();
|
||||
constexpr auto taskCountToWait = 1;
|
||||
|
||||
const auto waitStatus = csr.waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
|
||||
EXPECT_EQ(WaitStatus::GpuHang, waitStatus);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenFailingFlushSubmissionsAndNoGpuHangWhenWaititingForCompletionWithTimeoutThenNotReadyIsReturned) {
|
||||
auto driverModelMock = std::make_unique<MockDriverModel>();
|
||||
driverModelMock->isGpuHangDetectedToReturn = false;
|
||||
|
||||
auto osInterface = std::make_unique<OSInterface>();
|
||||
osInterface->setDriverModel(std::move(driverModelMock));
|
||||
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
csr.latestFlushedTaskCount = 0;
|
||||
csr.shouldFailFlushBatchedSubmissions = true;
|
||||
csr.executionEnvironment.rootDeviceEnvironments[csr.rootDeviceIndex]->osInterface = std::move(osInterface);
|
||||
csr.callBaseWaitForCompletionWithTimeout = true;
|
||||
|
||||
constexpr auto enableTimeout = false;
|
||||
constexpr auto timeoutMicroseconds = std::numeric_limits<std::int64_t>::max();
|
||||
constexpr auto taskCountToWait = 1;
|
||||
|
||||
const auto waitStatus = csr.waitForCompletionWithTimeout(enableTimeout, timeoutMicroseconds, taskCountToWait);
|
||||
EXPECT_EQ(WaitStatus::NotReady, waitStatus);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverTest, givenCommandStreamReceiverWhenCheckedForInitialStatusOfStatelessMocsIndexThenUnknownMocsIsReturend) {
|
||||
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
EXPECT_EQ(CacheSettings::unknownMocs, csr.latestSentStatelessMocsConfig);
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/ult_hw_config.h"
|
||||
#include "shared/test/common/mocks/mock_device.h"
|
||||
#include "shared/test/common/mocks/mock_driver_model.h"
|
||||
#include "shared/test/common/mocks/mock_execution_environment.h"
|
||||
#include "shared/test/common/mocks/mock_memory_manager.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
@@ -28,17 +29,6 @@ class MockMemoryManagerOsAgnosticContext : public MockMemoryManager {
|
||||
}
|
||||
};
|
||||
|
||||
struct MockDriverModel : NEO::DriverModel {
|
||||
PhysicalDevicePciBusInfo pciBusInfo{};
|
||||
MockDriverModel() : NEO::DriverModel(NEO::DriverModelType::UNKNOWN) {}
|
||||
void setGmmInputArgs(void *args) override {}
|
||||
uint32_t getDeviceHandle() const override { return {}; }
|
||||
PhysicalDevicePciBusInfo getPciBusInfo() const override { return pciBusInfo; }
|
||||
size_t getMaxMemAllocSize() const override {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
template <PRODUCT_FAMILY gfxProduct>
|
||||
class MockHwInfoConfigHw : public HwInfoConfigHw<gfxProduct> {
|
||||
public:
|
||||
|
||||
Reference in New Issue
Block a user