236 lines
10 KiB
C++
236 lines
10 KiB
C++
/*
|
|
* Copyright (C) 2020-2025 Intel Corporation
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*
|
|
*/
|
|
|
|
#include "shared/source/device/device.h"
|
|
#include "shared/source/direct_submission/windows/wddm_direct_submission.h"
|
|
#include "shared/source/execution_environment/root_device_environment.h"
|
|
#include "shared/source/gmm_helper/gmm_helper.h"
|
|
#include "shared/source/os_interface/windows/os_context_win.h"
|
|
#include "shared/source/os_interface/windows/wddm/wddm.h"
|
|
#include "shared/source/os_interface/windows/wddm/wddm_interface.h"
|
|
#include "shared/source/os_interface/windows/wddm/wddm_residency_logger.h"
|
|
#include "shared/source/os_interface/windows/wddm_allocation.h"
|
|
#include "shared/source/os_interface/windows/wddm_memory_operations_handler.h"
|
|
#include "shared/source/utilities/arrayref.h"
|
|
|
|
namespace NEO {
|
|
|
|
// Initialize COMMAND_BUFFER_HEADER Type PatchList Streamer Perf Tag
|
|
DECLARE_COMMAND_BUFFER(CommandBufferHeader, UMD_OCL, FALSE, FALSE, PERFTAG_OCL);
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
WddmDirectSubmission<GfxFamily, Dispatcher>::WddmDirectSubmission(const DirectSubmissionInputParams &inputParams)
|
|
: DirectSubmissionHw<GfxFamily, Dispatcher>(inputParams) {
|
|
osContextWin = reinterpret_cast<OsContextWin *>(&this->osContext);
|
|
wddm = osContextWin->getWddm();
|
|
commandBufferHeader = std::make_unique<COMMAND_BUFFER_HEADER_REC>();
|
|
*(commandBufferHeader.get()) = CommandBufferHeader;
|
|
if (osContextWin->getPreemptionMode() != PreemptionMode::Disabled) {
|
|
commandBufferHeader->NeedsMidBatchPreEmptionSupport = true;
|
|
}
|
|
perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "Starting Wddm ULLS. Placement ring buffer: %d semaphore %d\n",
|
|
debugManager.flags.DirectSubmissionBufferPlacement.get(),
|
|
debugManager.flags.DirectSubmissionSemaphorePlacement.get());
|
|
|
|
this->completionFenceAllocation = inputParams.completionFenceAllocation;
|
|
UNRECOVERABLE_IF(!this->completionFenceAllocation);
|
|
if (this->miMemFenceRequired) {
|
|
this->gpuVaForAdditionalSynchronizationWA = this->completionFenceAllocation->getGpuAddress() + 8u;
|
|
}
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
WddmDirectSubmission<GfxFamily, Dispatcher>::~WddmDirectSubmission() {
|
|
perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "Stopping Wddm ULLS\n");
|
|
if (this->ringStart) {
|
|
this->stopRingBuffer(true);
|
|
}
|
|
this->deallocateResources();
|
|
wddm->getWddmInterface()->destroyMonitorFence(ringFence);
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
inline void WddmDirectSubmission<GfxFamily, Dispatcher>::flushMonitorFence(bool notifyKmd) {
|
|
auto needStart = !this->ringStart;
|
|
|
|
size_t requiredMinimalSize = this->getSizeSemaphoreSection(false) +
|
|
Dispatcher::getSizeMonitorFence(this->rootDeviceEnvironment) +
|
|
this->getSizeNewResourceHandler() +
|
|
this->getSizeSwitchRingBufferSection() +
|
|
this->getSizeEnd(false);
|
|
this->switchRingBuffersNeeded(requiredMinimalSize, nullptr);
|
|
|
|
auto startVA = this->ringCommandStream.getCurrentGpuAddressPosition();
|
|
|
|
this->handleNewResourcesSubmission();
|
|
|
|
TagData currentTagData = {};
|
|
this->getTagAddressValue(currentTagData);
|
|
Dispatcher::dispatchMonitorFence(this->ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment, this->partitionedMode, this->dcFlushRequired, notifyKmd);
|
|
|
|
this->dispatchSemaphoreSection(this->currentQueueWorkCount + 1);
|
|
this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize, true, nullptr);
|
|
this->currentQueueWorkCount++;
|
|
|
|
this->updateTagValueImpl(this->currentRingBuffer);
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
void WddmDirectSubmission<GfxFamily, Dispatcher>::ensureRingCompletion() {
|
|
WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionFence(ringFence.lastSubmittedFence, ringFence);
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
bool WddmDirectSubmission<GfxFamily, Dispatcher>::allocateOsResources() {
|
|
bool ret = wddm->getWddmInterface()->createMonitoredFenceForDirectSubmission(ringFence, *this->osContextWin);
|
|
perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "ULLS resource allocation finished with: %d\n", ret);
|
|
return ret;
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
bool WddmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, size_t size, const ResidencyContainer *allocationsForResidency) {
|
|
perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "ULLS Submit to GPU\n");
|
|
COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast<COMMAND_BUFFER_HEADER *>(commandBufferHeader.get());
|
|
pHeader->RequiresCoherency = false;
|
|
|
|
pHeader->UmdRequestedSliceState = 0;
|
|
pHeader->UmdRequestedEUCount = wddm->getRequestedEUCount();
|
|
|
|
pHeader->UmdRequestedSubsliceCount = 0;
|
|
pHeader->NeedsMidBatchPreEmptionSupport = true;
|
|
|
|
WddmSubmitArguments submitArgs = {};
|
|
submitArgs.contextHandle = osContextWin->getWddmContextHandle();
|
|
submitArgs.hwQueueHandle = osContextWin->getHwQueue().handle;
|
|
submitArgs.monitorFence = &ringFence;
|
|
|
|
return wddm->submit(gpuAddress, size, pHeader, submitArgs);
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
bool WddmDirectSubmission<GfxFamily, Dispatcher>::handleResidency() {
|
|
wddm->waitOnPagingFenceFromCpu(this->lastSubmittedThrottle == QueueThrottle::LOW);
|
|
perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "ULLS residency wait exit\n");
|
|
return true;
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleStopRingBuffer() {
|
|
if (this->disableMonitorFence) {
|
|
updateTagValueImpl(this->currentRingBuffer);
|
|
}
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleSwitchRingBuffers(ResidencyContainer *allocationsForResidency) {
|
|
if (this->disableMonitorFence) {
|
|
if (allocationsForResidency) {
|
|
auto lock = osContextWin->getResidencyController().acquireLock();
|
|
updateTagValueImpl(this->previousRingBuffer);
|
|
updateMonitorFenceValueForResidencyList(allocationsForResidency);
|
|
} else {
|
|
updateTagValueImpl(this->previousRingBuffer);
|
|
}
|
|
}
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
uint64_t WddmDirectSubmission<GfxFamily, Dispatcher>::updateTagValue(bool requireMonitorFence) {
|
|
if (this->detectGpuHang) {
|
|
bool osHang = wddm->isGpuHangDetected(*osContextWin);
|
|
bool ringHang = *ringFence.cpuAddress == Wddm::gpuHangIndication;
|
|
|
|
if (osHang || ringHang) {
|
|
wddm->getDeviceState();
|
|
return DirectSubmissionHw<GfxFamily, Dispatcher>::updateTagValueFail;
|
|
}
|
|
}
|
|
|
|
if (requireMonitorFence) {
|
|
return this->updateTagValueImpl(this->currentRingBuffer);
|
|
}
|
|
MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence();
|
|
return currentFence.currentFenceValue;
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
bool WddmDirectSubmission<GfxFamily, Dispatcher>::dispatchMonitorFenceRequired(bool requireMonitorFence) {
|
|
return !this->disableMonitorFence || requireMonitorFence;
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
uint64_t WddmDirectSubmission<GfxFamily, Dispatcher>::updateTagValueImpl(uint32_t completionBufferIndex) {
|
|
MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence();
|
|
|
|
currentFence.lastSubmittedFence = currentFence.currentFenceValue;
|
|
currentFence.currentFenceValue++;
|
|
this->ringBuffers[completionBufferIndex].completionFence = currentFence.lastSubmittedFence;
|
|
|
|
return currentFence.lastSubmittedFence;
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionFence(uint64_t completionValue, MonitoredFence &fence) {
|
|
wddm->waitFromCpu(completionValue, fence, false);
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
void WddmDirectSubmission<GfxFamily, Dispatcher>::getTagAddressValue(TagData &tagData) {
|
|
MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence();
|
|
auto gmmHelper = wddm->getRootDeviceEnvironment().getGmmHelper();
|
|
|
|
tagData.tagAddress = gmmHelper->canonize(currentFence.gpuAddress);
|
|
tagData.tagValue = currentFence.currentFenceValue;
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
inline bool WddmDirectSubmission<GfxFamily, Dispatcher>::isCompleted(uint32_t ringBufferIndex) {
|
|
MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence();
|
|
auto lastSubmittedFence = this->ringBuffers[ringBufferIndex].completionFence;
|
|
if (lastSubmittedFence > *currentFence.cpuAddress) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
void WddmDirectSubmission<GfxFamily, Dispatcher>::updateMonitorFenceValueForResidencyList(ResidencyContainer *allocationsForResidency) {
|
|
if (allocationsForResidency == nullptr) {
|
|
return;
|
|
}
|
|
const auto currentFence = osContextWin->getResidencyController().getMonitoredFence().currentFenceValue;
|
|
auto contextId = osContextWin->getContextId();
|
|
for (uint32_t i = 0; i < allocationsForResidency->size(); i++) {
|
|
WddmAllocation *allocation = static_cast<WddmAllocation *>((*allocationsForResidency)[i]);
|
|
// Update fence value not to early destroy / evict allocation
|
|
allocation->updateCompletionDataForAllocationAndFragments(currentFence, contextId);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Unblocks pipeline by waiting for paging fence value and signals semaphore with latest value.
|
|
* Signals semaphore with latest fence value instead of passed one to unblock immediately subsequent semaphores.
|
|
*
|
|
* @param[in] pagingFenceValue paging fence submitted from user thread to wait for.
|
|
*/
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
inline void WddmDirectSubmission<GfxFamily, Dispatcher>::unblockPagingFenceSemaphore(uint64_t pagingFenceValue) {
|
|
this->wddm->waitOnPagingFenceFromCpu(pagingFenceValue, true);
|
|
this->semaphoreData->pagingFenceCounter = static_cast<uint32_t>(*this->wddm->getPagingFenceAddress());
|
|
}
|
|
|
|
template <typename GfxFamily, typename Dispatcher>
|
|
inline void WddmDirectSubmission<GfxFamily, Dispatcher>::makeGlobalFenceAlwaysResident() {
|
|
if (this->globalFenceAllocation != nullptr) {
|
|
DirectSubmissionAllocations allocations;
|
|
allocations.push_back(this->globalFenceAllocation);
|
|
UNRECOVERABLE_IF(!this->makeResourcesResident(allocations));
|
|
}
|
|
}
|
|
|
|
} // namespace NEO
|