refactor: Move monitor fence to os context win

Related-To: NEO-13315

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2025-10-15 09:21:43 +00:00
committed by Compute-Runtime-Automation
parent dd252e7852
commit f1f13f05e2
19 changed files with 199 additions and 206 deletions

View File

@@ -51,7 +51,7 @@ bool OsContextWin::initializeContext(bool allocateInterrupt) {
}
return true;
};
}
void OsContextWin::reInitializeContext() {
NEO::EnvironmentVariableReader envReader;
@@ -68,7 +68,15 @@ void OsContextWin::reInitializeContext() {
UNRECOVERABLE_IF(!wddmInterface->createMonitoredFence(*this));
}
}
};
}
void OsContextWin::resetMonitoredFenceParams(D3DKMT_HANDLE &handle, uint64_t *cpuAddress, D3DGPU_VIRTUAL_ADDRESS &gpuAddress) {
monitoredFence.lastSubmittedFence = 0;
monitoredFence.currentFenceValue = 1;
monitoredFence.fenceHandle = handle;
monitoredFence.cpuAddress = cpuAddress;
monitoredFence.gpuAddress = gpuAddress;
}
void OsContextWin::getDeviceLuidArray(std::vector<uint8_t> &luidData, size_t arraySize) {
auto *wddm = this->getWddm();
@@ -85,7 +93,7 @@ void OsContextWin::getDeviceLuidArray(std::vector<uint8_t> &luidData, size_t arr
luidData.emplace(luidData.end(), luidArray[i - 4]);
}
}
};
}
uint32_t OsContextWin::getDeviceNodeMask() {
auto *wddm = this->getWddm();
@@ -108,8 +116,8 @@ bool OsContextWin::isDirectSubmissionSupported() const {
OsContextWin::~OsContextWin() {
if (contextInitialized && (false == this->wddm.skipResourceCleanup())) {
wddm.getWddmInterface()->destroyHwQueue(hardwareQueue.handle);
if (residencyController.getMonitoredFence().fenceHandle != hardwareQueue.progressFenceHandle) {
wddm.getWddmInterface()->destroyMonitorFence(residencyController.getMonitoredFence().fenceHandle);
if (getMonitoredFence().fenceHandle != hardwareQueue.progressFenceHandle) {
wddm.getWddmInterface()->destroyMonitorFence(getMonitoredFence().fenceHandle);
}
if (!isPartOfContextGroup() ||

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -35,6 +35,11 @@ class OsContextWin : public OsContext {
Wddm *getWddm() const { return &wddm; }
MOCKABLE_VIRTUAL WddmResidencyController &getResidencyController() { return residencyController; }
static OsContext *create(OSInterface *osInterface, uint32_t rootDeviceIndex, uint32_t contextId, const EngineDescriptor &engineDescriptor);
MonitoredFence &getMonitoredFence() { return monitoredFence; }
void resetMonitoredFenceParams(D3DKMT_HANDLE &handle, uint64_t *cpuAddress, D3DGPU_VIRTUAL_ADDRESS &gpuAddress);
bool wasAllocationUsedSinceLastTrim(uint64_t fenceValue) { return fenceValue > lastTrimFenceValue; }
void updateLastTrimFenceValue() { lastTrimFenceValue = *this->getMonitoredFence().cpuAddress; }
uint64_t getLastTrimFenceValue() const { return this->lastTrimFenceValue; };
void reInitializeContext() override;
void getDeviceLuidArray(std::vector<uint8_t> &luidData, size_t arraySize);
uint32_t getDeviceNodeMask();
@@ -44,7 +49,12 @@ class OsContextWin : public OsContext {
bool initializeContext(bool allocateInterrupt) override;
WddmResidencyController residencyController;
HardwareQueue hardwareQueue;
MonitoredFence monitoredFence = {};
uint64_t lastTrimFenceValue = 0u;
Wddm &wddm;
D3DKMT_HANDLE wddmContextHandle = 0;
};

View File

@@ -7,6 +7,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/os_interface/windows/gdi_interface.h"
#include "shared/source/os_interface/windows/os_context_win.h"
#include "shared/source/os_interface/windows/wddm/wddm.h"
#include "shared/source/os_interface/windows/wddm/wddm_residency_logger.h"
#include "shared/source/os_interface/windows/wddm_allocation.h"
@@ -56,7 +57,8 @@ void APIENTRY WddmResidencyController::trimCallback(_Inout_ D3DKMT_TRIMNOTIFICAT
void WddmResidencyController::trimResidency(const D3DDDI_TRIMRESIDENCYSET_FLAGS &flags, uint64_t bytes) {
std::chrono::high_resolution_clock::time_point callbackStart;
perfLogResidencyTrimCallbackBegin(wddm.getResidencyLogger(), callbackStart);
uint32_t osContextId = this->csr->getOsContext().getContextId();
auto &osContext = static_cast<NEO::OsContextWin &>(this->csr->getOsContext());
uint32_t osContextId = osContext.getContextId();
if (flags.PeriodicTrim) {
uint64_t sizeToTrim = 0;
@@ -67,9 +69,9 @@ void WddmResidencyController::trimResidency(const D3DDDI_TRIMRESIDENCYSET_FLAGS
for (auto allocationIter = evictionAllocations.begin(); allocationIter != evictionAllocations.end();) {
wddmAllocation = reinterpret_cast<WddmAllocation *>(*allocationIter);
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "lastPeriodicTrimFenceValue = ", lastTrimFenceValue);
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "lastPeriodicTrimFenceValue = ", osContext.getLastTrimFenceValue());
if (wasAllocationUsedSinceLastTrim(wddmAllocation->getResidencyData().getFenceValueForContextId(osContextId))) {
if (osContext.wasAllocationUsedSinceLastTrim(wddmAllocation->getResidencyData().getFenceValueForContextId(osContextId))) {
allocationIter++;
continue;
}
@@ -87,7 +89,7 @@ void WddmResidencyController::trimResidency(const D3DDDI_TRIMRESIDENCYSET_FLAGS
for (uint32_t allocationId = 0; allocationId < wddmAllocation->fragmentsStorage.fragmentCount; allocationId++) {
AllocationStorageData &fragmentStorageData = wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId];
if (!wasAllocationUsedSinceLastTrim(fragmentStorageData.residency->getFenceValueForContextId(osContextId))) {
if (!osContext.wasAllocationUsedSinceLastTrim(fragmentStorageData.residency->getFenceValueForContextId(osContextId))) {
auto osHandle = static_cast<OsHandleWin *>(wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].osHandleStorage);
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "Evict fragment: handle =", osHandle->handle, "lastFence =",
wddmAllocation->fragmentsStorage.fragmentStorageData[allocationId].residency->getFenceValueForContextId(osContextId));
@@ -112,8 +114,8 @@ void WddmResidencyController::trimResidency(const D3DDDI_TRIMRESIDENCYSET_FLAGS
}
if (flags.PeriodicTrim || flags.RestartPeriodicTrim) {
this->updateLastTrimFenceValue();
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "updated lastPeriodicTrimFenceValue =", lastTrimFenceValue);
osContext.updateLastTrimFenceValue();
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "updated lastPeriodicTrimFenceValue =", osContext.getLastTrimFenceValue());
}
perfLogResidencyTrimCallbackEnd(wddm.getResidencyLogger(), flags.Value, this, callbackStart);
@@ -121,7 +123,8 @@ void WddmResidencyController::trimResidency(const D3DDDI_TRIMRESIDENCYSET_FLAGS
bool WddmResidencyController::trimResidencyToBudget(uint64_t bytes) {
this->csr->drainPagingFenceQueue();
uint32_t osContextId = this->csr->getOsContext().getContextId();
auto &osContext = static_cast<NEO::OsContextWin &>(this->csr->getOsContext());
uint32_t osContextId = osContext.getContextId();
uint64_t sizeToTrim = 0;
uint64_t numberOfBytesToTrim = bytes;
WddmAllocation *wddmAllocation = nullptr;
@@ -133,7 +136,7 @@ bool WddmResidencyController::trimResidencyToBudget(uint64_t bytes) {
while (numberOfBytesToTrim > 0 && allocationIter != evictionAllocations.end()) {
wddmAllocation = reinterpret_cast<WddmAllocation *>(*allocationIter);
uint64_t lastFence = wddmAllocation->getResidencyData().getFenceValueForContextId(osContextId);
auto &monitoredFence = this->getMonitoredFence();
auto &monitoredFence = osContext.getMonitoredFence();
if (lastFence > monitoredFence.lastSubmittedFence) {
allocationIter++;
@@ -148,7 +151,7 @@ bool WddmResidencyController::trimResidencyToBudget(uint64_t bytes) {
uint64_t sizeEvicted = 0;
if (lastFence > *monitoredFence.cpuAddress) {
this->wddm.waitFromCpu(lastFence, this->getMonitoredFence(), false);
this->wddm.waitFromCpu(lastFence, osContext.getMonitoredFence(), false);
}
if (wddmAllocation->fragmentsStorage.fragmentCount == 0) {

View File

@@ -1197,7 +1197,7 @@ bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredF
if (!skipResourceCleanup() && lastFenceValue > *monitoredFence.cpuAddress) {
CommandStreamReceiver *csr = nullptr;
this->forEachContextWithinWddm([&monitoredFence, &csr](const EngineControl &engine) {
auto &contextMonitoredFence = static_cast<OsContextWin *>(engine.osContext)->getResidencyController().getMonitoredFence();
auto &contextMonitoredFence = static_cast<OsContextWin *>(engine.osContext)->getMonitoredFence();
if (contextMonitoredFence.cpuAddress == monitoredFence.cpuAddress) {
csr = engine.commandStreamReceiver;
}
@@ -1239,7 +1239,7 @@ bool Wddm::waitFromCpu(uint64_t lastFenceValue, const MonitoredFence &monitoredF
bool Wddm::isGpuHangDetected(OsContext &osContext) {
const auto osContextWin = static_cast<OsContextWin *>(&osContext);
const auto &monitoredFence = osContextWin->getResidencyController().getMonitoredFence();
const auto &monitoredFence = osContextWin->getMonitoredFence();
bool hangDetected = monitoredFence.cpuAddress && *monitoredFence.cpuAddress == gpuHangIndication;
PRINT_DEBUG_STRING(hangDetected && debugManager.flags.PrintDebugMessages.get(), stderr, "%s", "ERROR: GPU HANG detected!\n");

View File

@@ -47,8 +47,7 @@ bool WddmInterface20::createHwQueue(OsContextWin &osContext) {
void WddmInterface20::destroyHwQueue(D3DKMT_HANDLE hwQueue) {}
bool WddmInterface20::createMonitoredFence(OsContextWin &osContext) {
auto &residencyController = osContext.getResidencyController();
MonitoredFence &monitorFence = residencyController.getMonitoredFence();
MonitoredFence &monitorFence = osContext.getMonitoredFence();
bool ret = WddmInterface::createMonitoredFence(monitorFence);
monitorFence.currentFenceValue = 1;
@@ -131,11 +130,10 @@ bool WddmInterface23::createHwQueue(OsContextWin &osContext) {
}
bool WddmInterface23::createMonitoredFence(OsContextWin &osContext) {
auto &residencyController = osContext.getResidencyController();
auto hwQueue = osContext.getHwQueue();
residencyController.resetMonitoredFenceParams(hwQueue.progressFenceHandle,
reinterpret_cast<uint64_t *>(hwQueue.progressFenceCpuVA),
hwQueue.progressFenceGpuVA);
osContext.resetMonitoredFenceParams(hwQueue.progressFenceHandle,
reinterpret_cast<uint64_t *>(hwQueue.progressFenceCpuVA),
hwQueue.progressFenceGpuVA);
return true;
}
@@ -187,14 +185,13 @@ bool WddmInterface23::submit(uint64_t commandBuffer, size_t size, void *commandH
bool NEO::WddmInterface23::createFenceForDirectSubmission(MonitoredFence &monitorFence, OsContextWin &osContext) {
MonitoredFence monitorFenceForResidency{};
auto ret = createSyncObject(monitorFenceForResidency);
auto &residencyController = osContext.getResidencyController();
auto lastSubmittedFence = residencyController.getMonitoredFence().lastSubmittedFence;
auto currentFenceValue = residencyController.getMonitoredFence().currentFenceValue;
residencyController.resetMonitoredFenceParams(monitorFenceForResidency.fenceHandle,
const_cast<uint64_t *>(monitorFenceForResidency.cpuAddress),
monitorFenceForResidency.gpuAddress);
residencyController.getMonitoredFence().currentFenceValue = currentFenceValue;
residencyController.getMonitoredFence().lastSubmittedFence = lastSubmittedFence;
auto lastSubmittedFence = osContext.getMonitoredFence().lastSubmittedFence;
auto currentFenceValue = osContext.getMonitoredFence().currentFenceValue;
osContext.resetMonitoredFenceParams(monitorFenceForResidency.fenceHandle,
const_cast<uint64_t *>(monitorFenceForResidency.cpuAddress),
monitorFenceForResidency.gpuAddress);
osContext.getMonitoredFence().currentFenceValue = currentFenceValue;
osContext.getMonitoredFence().lastSubmittedFence = lastSubmittedFence;
auto hwQueue = osContext.getHwQueue();
monitorFence.cpuAddress = reinterpret_cast<uint64_t *>(hwQueue.progressFenceCpuVA);

View File

@@ -134,7 +134,7 @@ SubmissionStatus WddmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchB
WddmSubmitArguments submitArgs = {};
submitArgs.contextHandle = osContextWin->getWddmContextHandle();
submitArgs.hwQueueHandle = osContextWin->getHwQueue().handle;
submitArgs.monitorFence = &osContextWin->getResidencyController().getMonitoredFence();
submitArgs.monitorFence = &osContextWin->getMonitoredFence();
auto status = wddm->submit(commandStreamAddress, batchBuffer.usedSize - batchBuffer.startOffset, commandBufferHeader, submitArgs);
this->flushStamp->setStamp(submitArgs.monitorFence->lastSubmittedFence);
@@ -147,7 +147,7 @@ SubmissionStatus WddmCommandStreamReceiver<GfxFamily>::flush(BatchBuffer &batchB
template <typename GfxFamily>
SubmissionStatus WddmCommandStreamReceiver<GfxFamily>::processResidency(ResidencyContainer &allocationsForResidency, uint32_t handleId) {
return static_cast<OsContextWin *>(this->osContext)->getResidencyController().makeResidentResidencyAllocations(allocationsForResidency, this->requiresBlockingResidencyHandling, this->osContext->getContextId()) ? SubmissionStatus::success : SubmissionStatus::outOfMemory;
return static_cast<OsContextWin *>(this->osContext)->getResidencyController().makeResidentResidencyAllocations(allocationsForResidency, this->requiresBlockingResidencyHandling, *static_cast<OsContextWin *>(this->osContext)) ? SubmissionStatus::success : SubmissionStatus::outOfMemory;
}
template <typename GfxFamily>
@@ -161,7 +161,7 @@ WddmMemoryManager *WddmCommandStreamReceiver<GfxFamily>::getMemoryManager() cons
template <typename GfxFamily>
bool WddmCommandStreamReceiver<GfxFamily>::waitForFlushStamp(FlushStamp &flushStampToWait) {
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getResidencyController().getMonitoredFence(), false);
return wddm->waitFromCpu(flushStampToWait, static_cast<OsContextWin *>(this->osContext)->getMonitoredFence(), false);
}
template <typename GfxFamily>

View File

@@ -815,7 +815,7 @@ void WddmMemoryManager::handleFenceCompletion(GraphicsAllocation *allocation) {
for (auto &engine : getRegisteredEngines(allocation->getRootDeviceIndex())) {
const auto lastFenceValue = wddmAllocation->getResidencyData().getFenceValueForContextId(engine.osContext->getContextId());
if (lastFenceValue != 0u) {
const auto &monitoredFence = static_cast<OsContextWin *>(engine.osContext)->getResidencyController().getMonitoredFence();
const auto &monitoredFence = static_cast<OsContextWin *>(engine.osContext)->getMonitoredFence();
const auto wddm = static_cast<OsContextWin *>(engine.osContext)->getWddm();
wddm->waitFromCpu(lastFenceValue, monitoredFence, engine.commandStreamReceiver->isAnyDirectSubmissionEnabled());
}

View File

@@ -9,6 +9,7 @@
#include "shared/source/command_stream/command_stream_receiver.h"
#include "shared/source/debug_settings/debug_settings_manager.h"
#include "shared/source/os_interface/windows/os_context_win.h"
#include "shared/source/os_interface/windows/wddm/wddm.h"
#include "shared/source/os_interface/windows/wddm_allocation.h"
#include "shared/source/os_interface/windows/wddm_residency_allocations_container.h"
@@ -41,14 +42,6 @@ std::unique_lock<SpinLock> WddmResidencyController::acquireTrimCallbackLock() {
return std::unique_lock<SpinLock>{this->trimCallbackLock};
}
void WddmResidencyController::resetMonitoredFenceParams(D3DKMT_HANDLE &handle, uint64_t *cpuAddress, D3DGPU_VIRTUAL_ADDRESS &gpuAddress) {
monitoredFence.lastSubmittedFence = 0;
monitoredFence.currentFenceValue = 1;
monitoredFence.fenceHandle = handle;
monitoredFence.cpuAddress = cpuAddress;
monitoredFence.gpuAddress = gpuAddress;
}
/**
* @brief Makes resident passed allocations on a device
*
@@ -61,7 +54,8 @@ void WddmResidencyController::resetMonitoredFenceParams(D3DKMT_HANDLE &handle, u
*
* @return returns true if all allocations either succeeded or are pending to be resident
*/
bool WddmResidencyController::makeResidentResidencyAllocations(ResidencyContainer &allocationsForResidency, bool &requiresBlockingResidencyHandling, uint32_t osContextId) {
bool WddmResidencyController::makeResidentResidencyAllocations(ResidencyContainer &allocationsForResidency, bool &requiresBlockingResidencyHandling, OsContextWin &osContext) {
auto osContextId = osContext.getContextId();
const size_t residencyCount = allocationsForResidency.size();
requiresBlockingResidencyHandling = false;
if (debugManager.flags.WaitForPagingFenceInController.get() != -1) {
@@ -70,7 +64,7 @@ bool WddmResidencyController::makeResidentResidencyAllocations(ResidencyContaine
auto lock = this->acquireLock();
backupResidencyContainer = allocationsForResidency;
auto totalSize = fillHandlesContainer(allocationsForResidency, requiresBlockingResidencyHandling, osContextId);
auto totalSize = fillHandlesContainer(allocationsForResidency, requiresBlockingResidencyHandling, osContext);
bool result = true;
if (!handlesForResidency.empty()) {
@@ -81,7 +75,7 @@ bool WddmResidencyController::makeResidentResidencyAllocations(ResidencyContaine
allocationsForResidency = backupResidencyContainer;
if (!trimmingDone) {
auto evictionStatus = wddm.getTemporaryResourcesContainer()->evictAllResources();
totalSize = fillHandlesContainer(allocationsForResidency, requiresBlockingResidencyHandling, osContextId);
totalSize = fillHandlesContainer(allocationsForResidency, requiresBlockingResidencyHandling, osContext);
if (evictionStatus == MemoryOperationsStatus::success) {
continue;
}
@@ -91,10 +85,10 @@ bool WddmResidencyController::makeResidentResidencyAllocations(ResidencyContaine
} while (debugManager.flags.WaitForMemoryRelease.get() && result == false);
break;
}
totalSize = fillHandlesContainer(allocationsForResidency, requiresBlockingResidencyHandling, osContextId);
totalSize = fillHandlesContainer(allocationsForResidency, requiresBlockingResidencyHandling, osContext);
}
}
const auto currentFence = this->getMonitoredFence().currentFenceValue;
const auto currentFence = osContext.getMonitoredFence().currentFenceValue;
if (result == true) {
for (uint32_t i = 0; i < residencyCount; i++) {
@@ -121,13 +115,14 @@ bool WddmResidencyController::makeResidentResidencyAllocations(ResidencyContaine
*
* @return returns total size in bytes of allocations which are not yet resident.
*/
size_t WddmResidencyController::fillHandlesContainer(ResidencyContainer &allocationsForResidency, bool &requiresBlockingResidencyHandling, uint32_t osContextId) {
size_t WddmResidencyController::fillHandlesContainer(ResidencyContainer &allocationsForResidency, bool &requiresBlockingResidencyHandling, OsContextWin &osContext) {
auto osContextId = osContext.getContextId();
size_t totalSize = 0;
const size_t residencyCount = allocationsForResidency.size();
handlesForResidency.clear();
handlesForResidency.reserve(residencyCount);
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", this->getMonitoredFence().currentFenceValue);
DBG_LOG(ResidencyDebugEnable, "Residency:", __FUNCTION__, "currentFenceValue =", osContext.getMonitoredFence().currentFenceValue);
auto checkIfAlreadyResident = [&](GraphicsAllocation *alloc) {
WddmAllocation *allocation = static_cast<WddmAllocation *>(alloc);

View File

@@ -25,6 +25,7 @@ class GraphicsAllocation;
class WddmAllocation;
class Wddm;
class CommandStreamReceiver;
class OsContextWin;
class WddmResidencyController {
public:
@@ -36,12 +37,6 @@ class WddmResidencyController {
[[nodiscard]] MOCKABLE_VIRTUAL std::unique_lock<SpinLock> acquireLock();
[[nodiscard]] std::unique_lock<SpinLock> acquireTrimCallbackLock();
bool wasAllocationUsedSinceLastTrim(uint64_t fenceValue) { return fenceValue > lastTrimFenceValue; }
void updateLastTrimFenceValue() { lastTrimFenceValue = *this->getMonitoredFence().cpuAddress; }
MonitoredFence &getMonitoredFence() { return monitoredFence; }
void resetMonitoredFenceParams(D3DKMT_HANDLE &handle, uint64_t *cpuAddress, D3DGPU_VIRTUAL_ADDRESS &gpuAddress);
void registerCallback();
void trimResidency(const D3DDDI_TRIMRESIDENCYSET_FLAGS &flags, uint64_t bytes);
@@ -50,7 +45,7 @@ class WddmResidencyController {
bool isMemoryBudgetExhausted() const { return memoryBudgetExhausted; }
void setMemoryBudgetExhausted() { memoryBudgetExhausted = true; }
bool makeResidentResidencyAllocations(ResidencyContainer &allocationsForResidency, bool &requiresBlockingResidencyHandling, uint32_t osContextId);
bool makeResidentResidencyAllocations(ResidencyContainer &allocationsForResidency, bool &requiresBlockingResidencyHandling, OsContextWin &osContext);
bool isInitialized() const;
@@ -63,15 +58,11 @@ class WddmResidencyController {
}
protected:
size_t fillHandlesContainer(ResidencyContainer &allocationsForResidency, bool &requiresBlockingResidencyHandling, uint32_t osContextId);
MonitoredFence monitoredFence = {};
size_t fillHandlesContainer(ResidencyContainer &allocationsForResidency, bool &requiresBlockingResidencyHandling, OsContextWin &osContext);
SpinLock lock;
SpinLock trimCallbackLock;
uint64_t lastTrimFenceValue = 0u;
Wddm &wddm;
VOID *trimCallbackHandle = nullptr;