/* * Copyright (C) 2020-2024 Intel Corporation * * SPDX-License-Identifier: MIT * */ #include "shared/source/device/device.h" #include "shared/source/direct_submission/windows/wddm_direct_submission.h" #include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/os_interface/windows/os_context_win.h" #include "shared/source/os_interface/windows/wddm/wddm.h" #include "shared/source/os_interface/windows/wddm/wddm_interface.h" #include "shared/source/os_interface/windows/wddm/wddm_residency_logger.h" #include "shared/source/os_interface/windows/wddm_allocation.h" #include "shared/source/os_interface/windows/wddm_memory_operations_handler.h" #include "shared/source/utilities/arrayref.h" namespace NEO { // Initialize COMMAND_BUFFER_HEADER Type PatchList Streamer Perf Tag DECLARE_COMMAND_BUFFER(CommandBufferHeader, UMD_OCL, FALSE, FALSE, PERFTAG_OCL); template WddmDirectSubmission::WddmDirectSubmission(const DirectSubmissionInputParams &inputParams) : DirectSubmissionHw(inputParams) { osContextWin = reinterpret_cast(&this->osContext); wddm = osContextWin->getWddm(); commandBufferHeader = std::make_unique(); *(commandBufferHeader.get()) = CommandBufferHeader; if (osContextWin->getPreemptionMode() != PreemptionMode::Disabled) { commandBufferHeader->NeedsMidBatchPreEmptionSupport = true; } perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "Starting Wddm ULLS. Placement ring buffer: %d semaphore %d\n", debugManager.flags.DirectSubmissionBufferPlacement.get(), debugManager.flags.DirectSubmissionSemaphorePlacement.get()); this->completionFenceAllocation = inputParams.completionFenceAllocation; UNRECOVERABLE_IF(!this->completionFenceAllocation); if (this->miMemFenceRequired) { this->gpuVaForAdditionalSynchronizationWA = this->completionFenceAllocation->getGpuAddress() + 8u; } } template WddmDirectSubmission::~WddmDirectSubmission() { perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "Stopping Wddm ULLS\n"); if (this->ringStart) { this->stopRingBuffer(true); } this->deallocateResources(); wddm->getWddmInterface()->destroyMonitorFence(ringFence); } template inline void WddmDirectSubmission::flushMonitorFence() { auto needStart = !this->ringStart; size_t requiredMinimalSize = this->getSizeSemaphoreSection(false) + Dispatcher::getSizeMonitorFence(this->rootDeviceEnvironment) + this->getSizeNewResourceHandler() + this->getSizeSwitchRingBufferSection() + this->getSizeEnd(false); this->switchRingBuffersNeeded(requiredMinimalSize, nullptr); auto startVA = this->ringCommandStream.getCurrentGpuAddressPosition(); this->handleNewResourcesSubmission(); TagData currentTagData = {}; this->getTagAddressValue(currentTagData); Dispatcher::dispatchMonitorFence(this->ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment, this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired); this->dispatchSemaphoreSection(this->currentQueueWorkCount + 1); this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize); this->currentQueueWorkCount++; this->updateTagValueImpl(this->currentRingBuffer); } template void WddmDirectSubmission::ensureRingCompletion() { WddmDirectSubmission::handleCompletionFence(ringFence.lastSubmittedFence, ringFence); } template bool WddmDirectSubmission::allocateOsResources() { bool ret = wddm->getWddmInterface()->createMonitoredFenceForDirectSubmission(ringFence, *this->osContextWin); perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "ULLS resource allocation finished with: %d\n", ret); return ret; } template bool WddmDirectSubmission::submit(uint64_t gpuAddress, size_t size) { perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "ULLS Submit to GPU\n"); COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast(commandBufferHeader.get()); pHeader->RequiresCoherency = false; pHeader->UmdRequestedSliceState = 0; pHeader->UmdRequestedEUCount = wddm->getRequestedEUCount(); pHeader->UmdRequestedSubsliceCount = 0; pHeader->NeedsMidBatchPreEmptionSupport = true; WddmSubmitArguments submitArgs = {}; submitArgs.contextHandle = osContextWin->getWddmContextHandle(); submitArgs.hwQueueHandle = osContextWin->getHwQueue().handle; submitArgs.monitorFence = &ringFence; return wddm->submit(gpuAddress, size, pHeader, submitArgs); } template bool WddmDirectSubmission::handleResidency() { wddm->waitOnPagingFenceFromCpu(this->lastSubmittedThrottle == QueueThrottle::LOW); perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "ULLS residency wait exit\n"); return true; } template void WddmDirectSubmission::handleStopRingBuffer() { if (this->disableMonitorFence) { updateTagValueImpl(this->currentRingBuffer); } } template void WddmDirectSubmission::handleSwitchRingBuffers(ResidencyContainer *allocationsForResidency) { if (this->disableMonitorFence) { auto lock = osContextWin->getResidencyController().acquireLock(); updateTagValueImpl(this->previousRingBuffer); updateMonitorFenceValueForResidencyList(allocationsForResidency); } } template uint64_t WddmDirectSubmission::updateTagValue(bool requireMonitorFence) { if (this->detectGpuHang) { bool osHang = wddm->isGpuHangDetected(*osContextWin); bool ringHang = *ringFence.cpuAddress == Wddm::gpuHangIndication; if (osHang || ringHang) { wddm->getDeviceState(); return DirectSubmissionHw::updateTagValueFail; } } if (requireMonitorFence) { return this->updateTagValueImpl(this->currentRingBuffer); } MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence(); return currentFence.currentFenceValue; } template bool WddmDirectSubmission::dispatchMonitorFenceRequired(bool requireMonitorFence) { return !this->disableMonitorFence || requireMonitorFence; } template uint64_t WddmDirectSubmission::updateTagValueImpl(uint32_t completionBufferIndex) { MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence(); currentFence.lastSubmittedFence = currentFence.currentFenceValue; currentFence.currentFenceValue++; this->ringBuffers[completionBufferIndex].completionFence = currentFence.lastSubmittedFence; return currentFence.lastSubmittedFence; } template void WddmDirectSubmission::handleCompletionFence(uint64_t completionValue, MonitoredFence &fence) { wddm->waitFromCpu(completionValue, fence, false); } template void WddmDirectSubmission::getTagAddressValue(TagData &tagData) { MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence(); auto gmmHelper = wddm->getRootDeviceEnvironment().getGmmHelper(); tagData.tagAddress = gmmHelper->canonize(currentFence.gpuAddress); tagData.tagValue = currentFence.currentFenceValue; } template inline bool WddmDirectSubmission::isCompleted(uint32_t ringBufferIndex) { MonitoredFence ¤tFence = osContextWin->getResidencyController().getMonitoredFence(); auto lastSubmittedFence = this->ringBuffers[ringBufferIndex].completionFence; if (lastSubmittedFence > *currentFence.cpuAddress) { return false; } return true; } template void WddmDirectSubmission::updateMonitorFenceValueForResidencyList(ResidencyContainer *allocationsForResidency) { if (allocationsForResidency == nullptr) { return; } const auto currentFence = osContextWin->getResidencyController().getMonitoredFence().currentFenceValue; auto contextId = osContextWin->getContextId(); for (uint32_t i = 0; i < allocationsForResidency->size(); i++) { WddmAllocation *allocation = static_cast((*allocationsForResidency)[i]); // Update fence value not to early destroy / evict allocation allocation->updateCompletionDataForAllocationAndFragments(currentFence, contextId); } } } // namespace NEO