feature: Dispatch monitor fence from wait

Resolves: NEO-8240
Related-To: NEO-8067

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2023-08-07 13:33:24 +00:00
committed by Compute-Runtime-Automation
parent f3f623900a
commit fc1f45b630
12 changed files with 225 additions and 28 deletions

View File

@@ -96,6 +96,8 @@ class DirectSubmissionHw {
return relaxedOrderingEnabled;
}
virtual void flushMonitorFence(){};
protected:
static constexpr size_t prefetchSize = 8 * MemoryConstants::cacheLineSize;
static constexpr size_t prefetchNoops = prefetchSize / sizeof(uint32_t);
@@ -110,6 +112,7 @@ class DirectSubmissionHw {
size_t getSizeNewResourceHandler();
virtual void handleStopRingBuffer(){};
virtual void ensureRingCompletion(){};
void switchRingBuffersNeeded(size_t size);
virtual uint64_t switchRingBuffers();
virtual void handleSwitchRingBuffers() = 0;
GraphicsAllocation *switchRingBuffersAllocations();

View File

@@ -960,9 +960,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
}
}
if (ringCommandStream.getAvailableSpace() < requiredMinimalSize) {
switchRingBuffers();
}
this->switchRingBuffersNeeded(requiredMinimalSize);
if (this->relaxedOrderingEnabled && batchBuffer.hasStallingCmds && this->relaxedOrderingSchedulerRequired) {
dispatchRelaxedOrderingQueueStall();
@@ -1027,6 +1025,13 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::isNewResourceHandleNeeded() {
return newResourcesBound;
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffersNeeded(size_t size) {
if (this->ringCommandStream.getAvailableSpace() < size) {
this->switchRingBuffers();
}
}
template <typename GfxFamily, typename Dispatcher>
inline uint64_t DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffers() {
GraphicsAllocation *nextRingBuffer = switchRingBuffersAllocations();

View File

@@ -23,6 +23,8 @@ class WddmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
~WddmDirectSubmission() override;
void flushMonitorFence() override;
protected:
bool allocateOsResources() override;
bool submit(uint64_t gpuAddress, size_t size) override;
@@ -31,6 +33,7 @@ class WddmDirectSubmission : public DirectSubmissionHw<GfxFamily, Dispatcher> {
void handleCompletionFence(uint64_t completionValue, MonitoredFence &fence);
void ensureRingCompletion() override;
void handleSwitchRingBuffers() override;
void handleStopRingBuffer() override;
uint64_t updateTagValue() override;
void getTagAddressValue(TagData &tagData) override;
bool isCompleted(uint32_t ringBufferIndex) override;

View File

@@ -41,6 +41,10 @@ WddmDirectSubmission<GfxFamily, Dispatcher>::WddmDirectSubmission(const DirectSu
if (this->miMemFenceRequired) {
this->gpuVaForAdditionalSynchronizationWA = this->completionFenceAllocation->getGpuAddress() + 8u;
}
if (DebugManager.flags.DirectSubmissionDisableMonitorFence.get() != -1) {
this->disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get();
}
}
template <typename GfxFamily, typename Dispatcher>
@@ -53,6 +57,31 @@ WddmDirectSubmission<GfxFamily, Dispatcher>::~WddmDirectSubmission() {
wddm->getWddmInterface()->destroyMonitorFence(ringFence);
}
template <typename GfxFamily, typename Dispatcher>
inline void WddmDirectSubmission<GfxFamily, Dispatcher>::flushMonitorFence() {
this->startRingBuffer();
size_t requiredMinimalSize = this->getSizeSemaphoreSection(false) +
Dispatcher::getSizeMonitorFence(this->rootDeviceEnvironment) +
this->getSizeNewResourceHandler() +
this->getSizeSwitchRingBufferSection() +
this->getSizeEnd(false);
this->switchRingBuffersNeeded(requiredMinimalSize);
this->handleNewResourcesSubmission();
TagData currentTagData = {};
this->getTagAddressValue(currentTagData);
Dispatcher::dispatchMonitorFence(this->ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment, this->useNotifyForPostSync, this->partitionedMode, this->dcFlushRequired);
this->dispatchSemaphoreSection(this->currentQueueWorkCount + 1);
this->handleResidency();
this->unblockGpu();
this->currentQueueWorkCount++;
this->updateTagValue();
}
template <typename GfxFamily, typename Dispatcher>
void WddmDirectSubmission<GfxFamily, Dispatcher>::ensureRingCompletion() {
WddmDirectSubmission<GfxFamily, Dispatcher>::handleCompletionFence(ringFence.lastSubmittedFence, ringFence);
@@ -96,19 +125,38 @@ bool WddmDirectSubmission<GfxFamily, Dispatcher>::handleResidency() {
return true;
}
template <typename GfxFamily, typename Dispatcher>
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleStopRingBuffer() {
if (this->disableMonitorFence) {
MonitoredFence &currentFence = osContextWin->getResidencyController().getMonitoredFence();
currentFence.lastSubmittedFence = currentFence.currentFenceValue;
currentFence.currentFenceValue++;
}
}
template <typename GfxFamily, typename Dispatcher>
void WddmDirectSubmission<GfxFamily, Dispatcher>::handleSwitchRingBuffers() {
if (this->disableMonitorFence) {
MonitoredFence &currentFence = osContextWin->getResidencyController().getMonitoredFence();
currentFence.lastSubmittedFence = currentFence.currentFenceValue;
currentFence.currentFenceValue++;
this->ringBuffers[this->currentRingBuffer].completionFence = currentFence.lastSubmittedFence;
}
}
template <typename GfxFamily, typename Dispatcher>
uint64_t WddmDirectSubmission<GfxFamily, Dispatcher>::updateTagValue() {
MonitoredFence &currentFence = osContextWin->getResidencyController().getMonitoredFence();
if (!this->disableMonitorFence) {
MonitoredFence &currentFence = osContextWin->getResidencyController().getMonitoredFence();
currentFence.lastSubmittedFence = currentFence.currentFenceValue;
currentFence.currentFenceValue++;
this->ringBuffers[this->currentRingBuffer].completionFence = currentFence.lastSubmittedFence;
currentFence.lastSubmittedFence = currentFence.currentFenceValue;
currentFence.currentFenceValue++;
this->ringBuffers[this->currentRingBuffer].completionFence = currentFence.lastSubmittedFence;
return currentFence.lastSubmittedFence;
return currentFence.lastSubmittedFence;
}
return 0ull;
}
template <typename GfxFamily, typename Dispatcher>