fix: ulls overflow

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>

Source: c18305d79e
This commit is contained in:
Bartosz Dunajski
2025-12-12 10:43:43 +00:00
committed by Compute-Runtime-Automation
parent 2cfa921691
commit 14b06bbbb0
4 changed files with 25 additions and 0 deletions

View File

@@ -485,6 +485,7 @@ DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address
DECLARE_DEBUG_VARIABLE(int32_t, WaitForPagingFenceInController, -1, "Instead of waiting for paging fence on user thread, program additional semaphore which will be signaled by direct submission controller when paging fence reaches required value -1: default, 0 - disable, 1 - enable.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerIdleDetection, -1, "Terminate direct submission only if CSR is idle. -1: default, 0 - disable, 1 - enable.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionControllerContextGroupIdleDetection, -1, "Terminate direct submission only if all CSRs in group are idle. -1: default, 0 - disable, 1 - enable.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInitialSemaphoreValue, -1, "-1: default, >0: initial semaphore counter value")
/*FEATURE FLAGS*/
DECLARE_DEBUG_VARIABLE(bool, RegisterPageFaultHandlerOnMigration, false, "Register handler on migration to GPU when current is not from pagefault manager")
DECLARE_DEBUG_VARIABLE(bool, EnableNV12, true, "Enables NV12 extension")

View File

@@ -195,6 +195,8 @@ class DirectSubmissionHw {
virtual bool isCompleted(uint32_t ringBufferIndex) = 0;
void updateRelaxedOrderingQueueSize(uint32_t newSize);
uint32_t getInitialSemaphoreValue() const;
void handleSemaphoreDataOverflow();
virtual void makeGlobalFenceAlwaysResident(){};
struct RingBufferUse {

View File

@@ -87,6 +87,8 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
if (Dispatcher::isCopy() && relaxedOrderingEnabled) {
relaxedOrderingEnabled = (debugManager.flags.DirectSubmissionRelaxedOrderingForBcs.get() != 0);
}
currentQueueWorkCount = getInitialSemaphoreValue();
}
template <typename GfxFamily, typename Dispatcher>
@@ -520,8 +522,27 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchUllsState() {
}
}
template <typename GfxFamily, typename Dispatcher>
uint32_t DirectSubmissionHw<GfxFamily, Dispatcher>::getInitialSemaphoreValue() const {
return debugManager.flags.DirectSubmissionInitialSemaphoreValue.getIfNotDefault<uint32_t>(1);
}
template <typename GfxFamily, typename Dispatcher>
void DirectSubmissionHw<GfxFamily, Dispatcher>::handleSemaphoreDataOverflow() {
stopRingBuffer(true);
currentQueueWorkCount = 0;
unblockGpu(); // set gpu allocation to 0
currentQueueWorkCount = getInitialSemaphoreValue();
}
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffer &batchBuffer, FlushStampTracker &flushStamp) {
// Handle overflow earlier (uint32_max - 3), in case of additional ring starts/stops
if ((currentQueueWorkCount + 1) >= (std::numeric_limits<uint32_t>::max() - 3)) {
handleSemaphoreDataOverflow();
}
this->handleRingRestartForUllsLightResidency(batchBuffer.allocationsForResidency);
lastSubmittedThrottle = batchBuffer.throttle;

View File

@@ -676,4 +676,5 @@ ForceTotalWMTPDataSize = -1
CopyLockedMemoryBeforeWrite = 0
SplitBcsPerEngineMaxSize = -1
EnableUsmPoolResidencyTracking = -1
DirectSubmissionInitialSemaphoreValue = -1
# Please don't edit below this line