refactor: change direct submission interfaces to accept monitor fence

- unify Linux and Windows default settings
- unify override default code
- correct size estimation when fence is required
- call virtual function once for both estimation and dispatch

Related-To: NEO-8395

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-09-20 11:32:46 +00:00
committed by Compute-Runtime-Automation
parent 69f5ca6345
commit a16d8f7b88
9 changed files with 62 additions and 72 deletions

View File

@@ -48,7 +48,7 @@ enum class DirectSubmissionSfenceMode : int32_t {
namespace UllsDefaults {
inline constexpr bool defaultDisableCacheFlush = true;
inline constexpr bool defaultDisableMonitorFence = false;
inline constexpr bool defaultDisableMonitorFence = true;
} // namespace UllsDefaults
struct BatchBuffer;
@@ -145,8 +145,8 @@ class DirectSubmissionHw {
void setReturnAddress(void *returnCmd, uint64_t returnAddress);
void *dispatchWorkloadSection(BatchBuffer &batchBuffer);
size_t getSizeDispatch(bool relaxedOrderingSchedulerRequired, bool returnPtrsRequired);
void *dispatchWorkloadSection(BatchBuffer &batchBuffer, bool dispatchMonitorFence);
size_t getSizeDispatch(bool relaxedOrderingSchedulerRequired, bool returnPtrsRequired, bool dispatchMonitorFence);
void dispatchPrefetchMitigation();
size_t getSizePrefetchMitigation();

View File

@@ -51,6 +51,9 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
disableCacheFlush = UllsDefaults::defaultDisableCacheFlush;
disableMonitorFence = UllsDefaults::defaultDisableMonitorFence;
if (DebugManager.flags.DirectSubmissionDisableMonitorFence.get() != -1) {
this->disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get();
}
if (DebugManager.flags.DirectSubmissionMaxRingBuffers.get() != -1) {
this->maxRingBufferCount = DebugManager.flags.DirectSubmissionMaxRingBuffers.get();
@@ -505,7 +508,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::startRingBuffer() {
startSize += RelaxedOrderingHelper::getSizeRegistersInit<GfxFamily>();
}
size_t requiredSize = startSize + getSizeDispatch(false, false) + getSizeEnd(false);
size_t requiredSize = startSize + getSizeDispatch(false, false, dispatchMonitorFenceRequired(true)) + getSizeEnd(false);
if (ringCommandStream.getAvailableSpace() < requiredSize) {
switchRingBuffers();
}
@@ -662,7 +665,7 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeEnd(bool relaxed
}
template <typename GfxFamily, typename Dispatcher>
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch(bool relaxedOrderingSchedulerRequired, bool returnPtrsRequired) {
inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch(bool relaxedOrderingSchedulerRequired, bool returnPtrsRequired, bool dispatchMonitorFence) {
size_t size = getSizeSemaphoreSection(relaxedOrderingSchedulerRequired);
if (workloadMode == 0) {
size += getSizeStartSection();
@@ -677,7 +680,7 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch(bool re
if (!disableCacheFlush) {
size += Dispatcher::getSizeCacheFlush(rootDeviceEnvironment);
}
if (!disableMonitorFence) {
if (dispatchMonitorFence) {
size += Dispatcher::getSizeMonitorFence(rootDeviceEnvironment);
}
@@ -695,7 +698,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::updateRelaxedOrderingQueueSize(u
}
template <typename GfxFamily, typename Dispatcher>
void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBuffer &batchBuffer) {
void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBuffer &batchBuffer, bool dispatchMonitorFence) {
void *currentPosition = ringCommandStream.getSpace(0);
if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) {
@@ -765,7 +768,7 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
Dispatcher::dispatchCacheFlush(ringCommandStream, this->rootDeviceEnvironment, gpuVaForMiFlush);
}
if (!disableMonitorFence || this->dispatchMonitorFenceRequired(batchBuffer.hasStallingCmds)) {
if (dispatchMonitorFence) {
TagData currentTagData = {};
getTagAddressValue(currentTagData);
Dispatcher::dispatchMonitorFence(ringCommandStream, currentTagData.tagAddress, currentTagData.tagValue, this->rootDeviceEnvironment,
@@ -935,8 +938,9 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
this->startRingBuffer();
bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies);
bool dispatchMonitorFence = this->dispatchMonitorFenceRequired(batchBuffer.hasStallingCmds);
size_t dispatchSize = getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies);
size_t dispatchSize = getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies, dispatchMonitorFence);
if (this->copyCommandBufferIntoRing(batchBuffer)) {
dispatchSize += (batchBuffer.stream->getUsed() - batchBuffer.startOffset) - 2 * getSizeStartSection();
@@ -965,7 +969,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
handleNewResourcesSubmission();
void *currentPosition = dispatchWorkloadSection(batchBuffer);
void *currentPosition = dispatchWorkloadSection(batchBuffer, dispatchMonitorFence);
cpuCachelineFlush(currentPosition, dispatchSize);
handleResidency();
@@ -1084,7 +1088,7 @@ inline GraphicsAllocation *DirectSubmissionHw<GfxFamily, Dispatcher>::switchRing
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchMonitorFenceRequired(bool hasStallingCmds) {
return false;
return !this->disableMonitorFence;
}
template <typename GfxFamily, typename Dispatcher>

View File

@@ -28,12 +28,6 @@ template <typename GfxFamily, typename Dispatcher>
DrmDirectSubmission<GfxFamily, Dispatcher>::DrmDirectSubmission(const DirectSubmissionInputParams &inputParams)
: DirectSubmissionHw<GfxFamily, Dispatcher>(inputParams) {
this->disableMonitorFence = true;
if (DebugManager.flags.DirectSubmissionDisableMonitorFence.get() != -1) {
this->disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get();
}
if (DebugManager.flags.OverrideUserFenceStartValue.get() != -1) {
this->completionFenceValue = static_cast<decltype(completionFenceValue)>(DebugManager.flags.OverrideUserFenceStartValue.get());
}

View File

@@ -41,12 +41,6 @@ WddmDirectSubmission<GfxFamily, Dispatcher>::WddmDirectSubmission(const DirectSu
if (this->miMemFenceRequired) {
this->gpuVaForAdditionalSynchronizationWA = this->completionFenceAllocation->getGpuAddress() + 8u;
}
this->disableMonitorFence = true;
if (DebugManager.flags.DirectSubmissionDisableMonitorFence.get() != -1) {
this->disableMonitorFence = DebugManager.flags.DirectSubmissionDisableMonitorFence.get();
}
}
template <typename GfxFamily, typename Dispatcher>
@@ -157,7 +151,7 @@ uint64_t WddmDirectSubmission<GfxFamily, Dispatcher>::updateTagValue(bool hasSta
template <typename GfxFamily, typename Dispatcher>
bool WddmDirectSubmission<GfxFamily, Dispatcher>::dispatchMonitorFenceRequired(bool hasStallingCmds) {
return hasStallingCmds;
return !this->disableMonitorFence || hasStallingCmds;
}
template <typename GfxFamily, typename Dispatcher>