mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
feature: Introduce ULLS light
Add core implementation of ULLS without VM_BIND interface aka ULLS light. Related-To: NEO-13922 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
488ac4bb6a
commit
bc2b49b958
@@ -112,8 +112,9 @@ class DirectSubmissionHw {
|
||||
MOCKABLE_VIRTUAL void deallocateResources();
|
||||
MOCKABLE_VIRTUAL bool makeResourcesResident(DirectSubmissionAllocations &allocations);
|
||||
virtual bool allocateOsResources() = 0;
|
||||
virtual bool submit(uint64_t gpuAddress, size_t size) = 0;
|
||||
virtual bool submit(uint64_t gpuAddress, size_t size, ResidencyContainer *allocationsForResidency) = 0;
|
||||
virtual bool handleResidency() = 0;
|
||||
virtual void handleRingRestartForUllsLightResidency(ResidencyContainer *allocationsForResidency){};
|
||||
void handleNewResourcesSubmission();
|
||||
bool isNewResourceHandleNeeded();
|
||||
size_t getSizeNewResourceHandler();
|
||||
@@ -129,7 +130,7 @@ class DirectSubmissionHw {
|
||||
virtual bool dispatchMonitorFenceRequired(bool requireMonitorFence);
|
||||
virtual void getTagAddressValue(TagData &tagData) = 0;
|
||||
void unblockGpu();
|
||||
bool submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size, bool needWait);
|
||||
bool submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size, bool needWait, ResidencyContainer *allocationsForResidency);
|
||||
bool copyCommandBufferIntoRing(BatchBuffer &batchBuffer);
|
||||
|
||||
void cpuCachelineFlush(void *ptr, size_t size);
|
||||
|
||||
@@ -502,7 +502,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit) {
|
||||
}
|
||||
dispatchSemaphoreSection(currentQueueWorkCount);
|
||||
|
||||
ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize);
|
||||
ringStart = submit(ringCommandStream.getGraphicsAllocation()->getGpuAddress(), startBufferSize, nullptr);
|
||||
performDiagnosticMode();
|
||||
return ringStart;
|
||||
}
|
||||
@@ -964,6 +964,8 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchUllsState() {
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) {
|
||||
this->handleRingRestartForUllsLightResidency(batchBuffer.allocationsForResidency);
|
||||
|
||||
lastSubmittedThrottle = batchBuffer.throttle;
|
||||
bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies);
|
||||
bool inputRequiredMonitorFence = false;
|
||||
@@ -1017,7 +1019,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
|
||||
cpuCachelineFlush(currentPosition, dispatchSize);
|
||||
|
||||
auto requiresBlockingResidencyHandling = batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling;
|
||||
if (!this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize, requiresBlockingResidencyHandling)) {
|
||||
if (!this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize, requiresBlockingResidencyHandling, batchBuffer.allocationsForResidency)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -1035,9 +1037,9 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
bool DirectSubmissionHw<GfxFamily, Dispatcher>::submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size, bool needWait) {
|
||||
bool DirectSubmissionHw<GfxFamily, Dispatcher>::submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size, bool needWait, ResidencyContainer *allocationsForResidency) {
|
||||
if (needStart) {
|
||||
this->ringStart = this->submit(gpuAddress, size);
|
||||
this->ringStart = this->submit(gpuAddress, size, allocationsForResidency);
|
||||
return this->ringStart;
|
||||
} else {
|
||||
if (needWait) {
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
|
||||
#pragma once
|
||||
#include "shared/source/direct_submission/direct_submission_hw.h"
|
||||
#include "shared/source/os_interface/linux/drm_buffer_object.h"
|
||||
#include "shared/source/os_interface/linux/drm_wrappers.h"
|
||||
|
||||
namespace NEO {
|
||||
|
||||
@@ -24,9 +26,10 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
|
||||
|
||||
protected:
|
||||
bool allocateOsResources() override;
|
||||
bool submit(uint64_t gpuAddress, size_t size) override;
|
||||
bool submit(uint64_t gpuAddress, size_t size, ResidencyContainer *allocationsForResidency) override;
|
||||
|
||||
bool handleResidency() override;
|
||||
void handleRingRestartForUllsLightResidency(ResidencyContainer *allocationsForResidency) override;
|
||||
void handleStopRingBuffer() override;
|
||||
|
||||
void ensureRingCompletion() override;
|
||||
@@ -43,5 +46,8 @@ class DrmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
|
||||
volatile TagAddressType *tagAddress;
|
||||
TaskCountType completionFenceValue{};
|
||||
std::chrono::microseconds gpuHangCheckPeriod{CommonConstants::gpuHangCheckTimeInUS};
|
||||
|
||||
std::vector<BufferObject *> residency{};
|
||||
std::vector<ExecObject> execObjectsStorage{};
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include "shared/source/direct_submission/linux/drm_direct_submission.h"
|
||||
#include "shared/source/os_interface/linux/drm_allocation.h"
|
||||
#include "shared/source/os_interface/linux/drm_buffer_object.h"
|
||||
#include "shared/source/os_interface/linux/drm_memory_operations_handler.h"
|
||||
#include "shared/source/os_interface/linux/drm_neo.h"
|
||||
#include "shared/source/os_interface/linux/drm_wrappers.h"
|
||||
#include "shared/source/os_interface/linux/ioctl_helper.h"
|
||||
@@ -27,6 +28,7 @@ namespace NEO {
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
DrmDirectSubmission<GfxFamily, Dispatcher>::DrmDirectSubmission(const DirectSubmissionInputParams &inputParams)
|
||||
: DirectSubmissionHw<GfxFamily, Dispatcher>(inputParams) {
|
||||
this->execObjectsStorage.resize(1u);
|
||||
|
||||
this->completionFenceValue = inputParams.initialCompletionFenceValue;
|
||||
if (debugManager.flags.OverrideUserFenceStartValue.get() != -1) {
|
||||
@@ -124,7 +126,7 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::allocateOsResources() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, size_t size) {
|
||||
bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, size_t size, ResidencyContainer *allocationsForResidency) {
|
||||
auto bb = static_cast<DrmAllocation *>(this->ringCommandStream.getGraphicsAllocation())->getBO();
|
||||
|
||||
auto osContextLinux = static_cast<OsContextLinux *>(&this->osContext);
|
||||
@@ -132,9 +134,9 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
|
||||
auto execFlags = osContextLinux->getEngineFlag() | drm.getIoctlHelper()->getDrmParamValue(DrmParam::execNoReloc);
|
||||
auto &drmContextIds = osContextLinux->getDrmContextIds();
|
||||
|
||||
ExecObject execObject{};
|
||||
|
||||
this->handleResidency();
|
||||
if (!allocationsForResidency) {
|
||||
this->handleResidency();
|
||||
}
|
||||
|
||||
auto currentBase = this->ringCommandStream.getGraphicsAllocation()->getGpuAddress();
|
||||
auto offset = ptrDiff(gpuAddress, currentBase);
|
||||
@@ -151,21 +153,33 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
|
||||
|
||||
for (auto drmIterator = 0u; drmIterator < osContextLinux->getDeviceBitfield().size(); drmIterator++) {
|
||||
if (osContextLinux->getDeviceBitfield().test(drmIterator)) {
|
||||
uint32_t errorCode = bb->exec(static_cast<uint32_t>(size),
|
||||
offset,
|
||||
execFlags,
|
||||
false,
|
||||
&this->osContext,
|
||||
drmIterator,
|
||||
drmContextIds[drmContextId],
|
||||
nullptr,
|
||||
0,
|
||||
&execObject,
|
||||
completionFenceGpuAddress,
|
||||
completionValue);
|
||||
auto size = allocationsForResidency ? allocationsForResidency->size() : 0u;
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
auto drmAlloc = static_cast<DrmAllocation *>((*allocationsForResidency)[i]);
|
||||
drmAlloc->makeBOsResident(&this->osContext, drmIterator, &this->residency, false, false);
|
||||
}
|
||||
|
||||
auto requiredSize = this->residency.size() + 1;
|
||||
if (requiredSize > this->execObjectsStorage.size()) {
|
||||
this->execObjectsStorage.resize(requiredSize);
|
||||
}
|
||||
|
||||
auto errorCode = bb->exec(static_cast<uint32_t>(size),
|
||||
offset,
|
||||
execFlags,
|
||||
false,
|
||||
&this->osContext,
|
||||
drmIterator,
|
||||
drmContextIds[drmContextId],
|
||||
this->residency.data(),
|
||||
this->residency.size(),
|
||||
this->execObjectsStorage.data(),
|
||||
completionFenceGpuAddress,
|
||||
completionValue);
|
||||
if (errorCode != 0) {
|
||||
this->dispatchErrorCode = errorCode;
|
||||
ret = false;
|
||||
break;
|
||||
}
|
||||
drmContextId++;
|
||||
if (completionFenceGpuAddress) {
|
||||
@@ -174,6 +188,8 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, siz
|
||||
}
|
||||
}
|
||||
|
||||
this->residency.clear();
|
||||
|
||||
if (this->isCompletionFenceSupported() && ret) {
|
||||
completionFenceValue++;
|
||||
}
|
||||
@@ -188,6 +204,16 @@ bool DrmDirectSubmission<GfxFamily, Dispatcher>::handleResidency() {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DrmDirectSubmission<GfxFamily, Dispatcher>::handleRingRestartForUllsLightResidency(ResidencyContainer *allocationsForResidency) {
|
||||
if (allocationsForResidency) {
|
||||
auto restartNeeded = static_cast<DrmMemoryOperationsHandler *>(this->memoryOperationHandler)->obtainAndResetNewResourcesSinceLastRingSubmit();
|
||||
if (restartNeeded) {
|
||||
this->stopRingBuffer(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
void DrmDirectSubmission<GfxFamily, Dispatcher>::handleStopRingBuffer() {
|
||||
if (this->disableMonitorFence) {
|
||||
@@ -209,6 +235,11 @@ void DrmDirectSubmission<GfxFamily, Dispatcher>::handleSwitchRingBuffers(Residen
|
||||
this->ringBuffers[this->previousRingBuffer].completionFence = this->currentTagData.tagValue;
|
||||
}
|
||||
}
|
||||
|
||||
if (allocationsForResidency) {
|
||||
allocationsForResidency->clear();
|
||||
static_cast<DrmMemoryOperationsHandler *>(this->memoryOperationHandler)->mergeWithResidencyContainer(&this->osContext, *allocationsForResidency);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -29,7 +29,7 @@ class WddmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
|
||||
|
||||
protected:
|
||||
bool allocateOsResources() override;
|
||||
bool submit(uint64_t gpuAddress, size_t size) override;
|
||||
bool submit(uint64_t gpuAddress, size_t size, ResidencyContainer *allocationForResidency) override;
|
||||
|
||||
bool handleResidency() override;
|
||||
void handleCompletionFence(uint64_t completionValue, MonitoredFence &fence);
|
||||
|
||||
@@ -73,7 +73,7 @@ inline void WddmDirectSubmission<GfxFamily, Dispatcher>::flushMonitorFence() {
|
||||
Dispatcher::dispatchMonitorFence(this->ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment, this->partitionedMode, this->dcFlushRequired);
|
||||
|
||||
this->dispatchSemaphoreSection(this->currentQueueWorkCount + 1);
|
||||
this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize, true);
|
||||
this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize, true, nullptr);
|
||||
this->currentQueueWorkCount++;
|
||||
|
||||
this->updateTagValueImpl(this->currentRingBuffer);
|
||||
@@ -92,7 +92,7 @@ bool WddmDirectSubmission<GfxFamily, Dispatcher>::allocateOsResources() {
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
bool WddmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, size_t size) {
|
||||
bool WddmDirectSubmission<GfxFamily, Dispatcher>::submit(uint64_t gpuAddress, size_t size, ResidencyContainer *allocationForResidency) {
|
||||
perfLogResidencyVariadicLog(wddm->getResidencyLogger(), "ULLS Submit to GPU\n");
|
||||
COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast<COMMAND_BUFFER_HEADER *>(commandBufferHeader.get());
|
||||
pHeader->RequiresCoherency = false;
|
||||
|
||||
Reference in New Issue
Block a user