Revert "refactor: Remove unused ulls functionalities"

This reverts commit 8ab4e1bcb8.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2025-09-12 02:25:44 +02:00
committed by Compute-Runtime-Automation
parent 74c7c625b8
commit 7610d7c90a
10 changed files with 528 additions and 7 deletions

View File

@@ -448,10 +448,12 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionBufferPlacement, -1, "-1: do not
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSemaphorePlacement, -1, "-1: do not override, 0: non-system, 1: system")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionBufferAddressing, -1, "-1: do not override, 0: not use 48bit, 1: use 48bit")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSemaphoreAddressing, -1, "-1: do not override, 0: not use 48bit, 1: use 48bit")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCpuCacheFlush, -1, "-1: do not override, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDrmContext, -1, "Create special drm context: -1: default, when new residency model available, 0: disable, 1: enable")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideBlitterSupport, -1, "Overrides default blitter support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideRenderSupport, -1, "Overrides default render support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideComputeSupport, -1, "Overrides default compute support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCacheFlush, -1, "-1: driver default, 0: additional cache flush is present 1: disable dispatching cache flush commands")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionNewResourceTlbFlush, -1, "-1: driver default - flush when new resource is bound, 0: disabled, 1: enabled")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDetectGpuHang, -1, "-1: default, 0: disable gpu hang detection after raising ulls semaphore, 1: enable gpu hang detection after raising ulls semaphore")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionFlatRingBuffer, -1, "-1: default, 0: disable, 1: enable, Copies task command buffer directly into ring, implemented for immediate command lists only")
@@ -463,6 +465,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionForceLocalMemoryStorageMode, -1,
DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0 - disable, 1 - enable. If enabled, completionFences wont be updated if ring is not running.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPCIBarrier, -1, "Use PCI barrier for data synchronization before semaphore unblock -1: default, 0 - disable, 1 - enable.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertExtraMiMemFenceCommands, -1, "-1: default, 0 - disable, 1 - enable. If enabled, add extra MI_MEM_FENCE instructions with acquire bit set")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Insert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMaxRingBuffers, -1, "-1: default, >0: max ring buffer count, During switch ring buffer, if there is no available ring, wait for completion instead of allocating new one if DirectSubmissionMaxRingBuffers is reached")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisablePrefetcher, -1, "-1: default, 0 - disable, 1 - enable. If enabled, disable prefetcher is being dispatched")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: default, 0 - disable, 1 - enable. If enabled, tasks sent to direct submission ring may be dispatched out of order")
@@ -471,6 +474,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingMinNumberOfClients, -1, "-1: default, >0: Enables RelaxedOrdering mode only if specified number of clients is assigned to given CSR.")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingCounterHeuristic, -1, "-1: default, 0: disabled, 1: enabled. If set use counter based heuristic to allow for relaxed ordering dispatch")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingCounterHeuristicTreshold, -1, "-1: default, >0: limit number of append calls to disable relaxed ordering dispatch")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMonitorFenceInputPolicy, -1, "-1: default, 0: stalling command flag, 1: explicit monitor fence flag. Selects policy to dispatch monitor fence upon input flag, either for every stalling command or explicit motor fence dispatch")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPrintSemaphoreUsage, -1, "-1: default, 0: disabled, 1: enabled. If set, print DirectSubmission semaphore programming and unlocking")
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSwitchSemaphoreMode, -1, "-1: default, 1: enable switch on unsuccessful, 0: disable switch on unsuccessful")
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")

View File

@@ -41,6 +41,16 @@ struct TagData {
uint64_t tagValue = 0ull;
};
enum class DirectSubmissionSfenceMode : int32_t {
disabled = 0,
beforeSemaphoreOnly = 1,
beforeAndAfterSemaphore = 2
};
namespace UllsDefaults {
inline constexpr bool defaultDisableCacheFlush = true;
} // namespace UllsDefaults
struct BatchBuffer;
class FlushStampTracker;
class GraphicsAllocation;
@@ -94,15 +104,19 @@ class DirectSubmissionHw {
protected:
struct SemaphoreFenceHelper : public NonCopyableAndNonMovableClass {
SemaphoreFenceHelper(const auto &directSubmission) : directSubmission(directSubmission) {
if (directSubmission.sfenceMode >= DirectSubmissionSfenceMode::beforeSemaphoreOnly) {
if (!directSubmission.miMemFenceRequired && !directSubmission.pciBarrierPtr && !directSubmission.hwInfo->capabilityTable.isIntegratedDevice) {
CpuIntrinsics::mfence();
} else {
CpuIntrinsics::sfence();
}
}
}
~SemaphoreFenceHelper() {
if (directSubmission.sfenceMode == DirectSubmissionSfenceMode::beforeAndAfterSemaphore) {
CpuIntrinsics::sfence();
}
}
const DirectSubmissionHw<GfxFamily, Dispatcher> &directSubmission;
};
@@ -138,6 +152,8 @@ class DirectSubmissionHw {
bool submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size, bool needWait, const ResidencyContainer *allocationsForResidency);
bool copyCommandBufferIntoRing(BatchBuffer &batchBuffer);
void cpuCachelineFlush(void *ptr, size_t size);
void dispatchSemaphoreSection(uint32_t value);
size_t getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired);
@@ -243,11 +259,14 @@ class DirectSubmissionHw {
uint32_t activeTiles = 1u;
uint32_t immWritePostSyncOffset = 0u;
uint32_t currentRelaxedOrderingQueueSize = 0;
DirectSubmissionSfenceMode sfenceMode = DirectSubmissionSfenceMode::beforeAndAfterSemaphore;
volatile uint32_t reserved = 0u;
uint32_t dispatchErrorCode = 0;
QueueThrottle lastSubmittedThrottle = QueueThrottle::MEDIUM;
bool ringStart = false;
bool disableCpuCacheFlush = true;
bool disableCacheFlush = false;
bool partitionedMode = false;
bool partitionConfigSet = true;
bool miMemFenceRequired = false;
@@ -259,6 +278,7 @@ class DirectSubmissionHw {
bool relaxedOrderingEnabled = false;
bool relaxedOrderingInitialized = false;
bool relaxedOrderingSchedulerRequired = false;
bool inputMonitorFenceDispatchRequirement = true;
bool notifyKmdDuringMonitorFence = false;
};
} // namespace NEO

View File

@@ -49,10 +49,16 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
auto &productHelper = inputParams.rootDeviceEnvironment.getHelper<ProductHelper>();
auto &compilerProductHelper = inputParams.rootDeviceEnvironment.getHelper<CompilerProductHelper>();
disableCacheFlush = UllsDefaults::defaultDisableCacheFlush;
if (debugManager.flags.DirectSubmissionMaxRingBuffers.get() != -1) {
this->maxRingBufferCount = debugManager.flags.DirectSubmissionMaxRingBuffers.get();
}
if (debugManager.flags.DirectSubmissionDisableCacheFlush.get() != -1) {
disableCacheFlush = !!debugManager.flags.DirectSubmissionDisableCacheFlush.get();
}
if (debugManager.flags.DirectSubmissionDetectGpuHang.get() != -1) {
detectGpuHang = !!debugManager.flags.DirectSubmissionDetectGpuHang.get();
}
@@ -67,11 +73,26 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
this->systemMemoryFenceAddressSet = true;
}
if (debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get() != -1) {
sfenceMode = static_cast<DirectSubmissionSfenceMode>(debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get());
}
if (debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.get() != -1) {
this->inputMonitorFenceDispatchRequirement = !!(debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.get());
}
int32_t disableCacheFlushKey = debugManager.flags.DirectSubmissionDisableCpuCacheFlush.get();
if (disableCacheFlushKey != -1) {
disableCpuCacheFlush = (disableCacheFlushKey == 1);
}
isDisablePrefetcherRequired = productHelper.isPrefetcherDisablingInDirectSubmissionRequired();
if (debugManager.flags.DirectSubmissionDisablePrefetcher.get() != -1) {
isDisablePrefetcherRequired = !!debugManager.flags.DirectSubmissionDisablePrefetcher.get();
}
UNRECOVERABLE_IF(!CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureClflush) && !disableCpuCacheFlush);
setImmWritePostSyncOffset();
dcFlushRequired = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, inputParams.rootDeviceEnvironment);
@@ -174,6 +195,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
semaphoreData = static_cast<volatile RingSemaphoreData *>(semaphorePtr);
memset(semaphorePtr, 0, sizeof(RingSemaphoreData));
semaphoreData->queueWorkCount = 0;
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
this->gpuVaForMiFlush = this->semaphoreGpuVa + offsetof(RingSemaphoreData, miFlushSpace);
this->gpuVaForPagingFenceSemaphore = this->semaphoreGpuVa + offsetof(RingSemaphoreData, pagingFenceCounter);
@@ -212,6 +234,25 @@ inline void DirectSubmissionHw<GfxFamily, Dispatcher>::unblockGpu() {
semaphoreData->queueWorkCount = currentQueueWorkCount;
}
template <typename GfxFamily, typename Dispatcher>
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::cpuCachelineFlush(void *ptr, size_t size) {
if (disableCpuCacheFlush) {
return;
}
constexpr size_t cachlineBit = 6;
static_assert(MemoryConstants::cacheLineSize == 1 << cachlineBit, "cachlineBit has invalid value");
char *flushPtr = reinterpret_cast<char *>(ptr);
char *flushEndPtr = reinterpret_cast<char *>(ptr) + size;
flushPtr = alignDown(flushPtr, MemoryConstants::cacheLineSize);
flushEndPtr = alignUp(flushEndPtr, MemoryConstants::cacheLineSize);
size_t cachelines = (flushEndPtr - flushPtr) >> cachlineBit;
for (size_t i = 0; i < cachelines; i++) {
CpuIntrinsics::clFlush(flushPtr);
flushPtr += MemoryConstants::cacheLineSize;
}
}
template <typename GfxFamily, typename Dispatcher>
bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit) {
bool ret = allocateResources();
@@ -260,10 +301,12 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer(bool blocking) {
return true;
}
bool relaxedOrderingSchedulerWasRequired = this->relaxedOrderingSchedulerRequired;
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
dispatchRelaxedOrderingQueueStall();
}
void *flushPtr = ringCommandStream.getSpace(0);
Dispatcher::dispatchCacheFlush(ringCommandStream, this->rootDeviceEnvironment, gpuVaForMiFlush);
dispatchStopRingBufferSection();
Dispatcher::dispatchStopCommandBuffer(ringCommandStream);
@@ -272,7 +315,9 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer(bool blocking) {
EncodeNoop<GfxFamily>::emitNoop(ringCommandStream, bytesToPad);
EncodeNoop<GfxFamily>::alignToCacheLine(ringCommandStream);
cpuCachelineFlush(flushPtr, getSizeEnd(relaxedOrderingSchedulerWasRequired));
this->unblockGpu();
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
this->handleStopRingBuffer();
this->ringStart = false;
@@ -376,6 +421,9 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch(bool re
size += RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
}
if (!disableCacheFlush) {
size += Dispatcher::getSizeCacheFlush(rootDeviceEnvironment);
}
if (dispatchMonitorFence) {
size += Dispatcher::getSizeMonitorFence(rootDeviceEnvironment);
}
@@ -458,6 +506,10 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
}
}
if (!disableCacheFlush) {
Dispatcher::dispatchCacheFlush(ringCommandStream, this->rootDeviceEnvironment, gpuVaForMiFlush);
}
if (dispatchMonitorFence) {
TagData currentTagData = {};
getTagAddressValue(currentTagData);
@@ -528,7 +580,13 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
lastSubmittedThrottle = batchBuffer.throttle;
bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies);
bool dispatchMonitorFence = this->dispatchMonitorFenceRequired(batchBuffer.dispatchMonitorFence);
bool inputRequiredMonitorFence = false;
if (this->inputMonitorFenceDispatchRequirement) {
inputRequiredMonitorFence = batchBuffer.dispatchMonitorFence;
} else {
inputRequiredMonitorFence = batchBuffer.hasStallingCmds;
}
bool dispatchMonitorFence = this->dispatchMonitorFenceRequired(inputRequiredMonitorFence);
size_t dispatchSize = this->getUllsStateSize() + getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies, dispatchMonitorFence);
@@ -568,13 +626,16 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
handleNewResourcesSubmission();
dispatchWorkloadSection(batchBuffer, dispatchMonitorFence);
void *currentPosition = dispatchWorkloadSection(batchBuffer, dispatchMonitorFence);
cpuCachelineFlush(currentPosition, dispatchSize);
auto requiresBlockingResidencyHandling = batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling;
if (!this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize, requiresBlockingResidencyHandling, batchBuffer.allocationsForResidency)) {
return false;
}
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
currentQueueWorkCount++;
uint64_t flushValue = updateTagValue(dispatchMonitorFence);
@@ -651,10 +712,12 @@ inline uint64_t DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffers(Res
this->handleRingRestartForUllsLightResidency(allocationsForResidency);
void *flushPtr = ringCommandStream.getSpace(0);
uint64_t currentBufferGpuVa = ringCommandStream.getCurrentGpuAddressPosition();
if (ringStart) {
dispatchSwitchRingBufferSection(nextRingBuffer->getGpuAddress());
cpuCachelineFlush(flushPtr, getSizeSwitchRingBufferSection());
}
ringCommandStream.replaceBuffer(nextRingBuffer->getUnderlyingBuffer(), ringCommandStream.getMaxAvailableSpace());

View File

@@ -18,6 +18,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::allocateResources;
using BaseClass::completionFenceAllocation;
using BaseClass::copyCommandBufferIntoRing;
using BaseClass::cpuCachelineFlush;
using BaseClass::currentQueueWorkCount;
using BaseClass::currentRelaxedOrderingQueueSize;
using BaseClass::currentRingBuffer;
@@ -26,6 +27,8 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::deferredTasksListAllocation;
using BaseClass::detectGpuHang;
using BaseClass::DirectSubmissionHw;
using BaseClass::disableCacheFlush;
using BaseClass::disableCpuCacheFlush;
using BaseClass::dispatchDisablePrefetcher;
using BaseClass::dispatchMonitorFenceRequired;
using BaseClass::dispatchPartitionRegisterConfiguration;
@@ -51,6 +54,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
using BaseClass::globalFenceAllocation;
using BaseClass::hwInfo;
using BaseClass::immWritePostSyncOffset;
using BaseClass::inputMonitorFenceDispatchRequirement;
using BaseClass::isDisablePrefetcherRequired;
using BaseClass::lastSubmittedThrottle;
using BaseClass::miMemFenceRequired;

View File

@@ -111,13 +111,16 @@ DirectSubmissionBufferPlacement = -1
DirectSubmissionSemaphorePlacement = -1
DirectSubmissionBufferAddressing = -1
DirectSubmissionSemaphoreAddressing = -1
DirectSubmissionDisableCpuCacheFlush = -1
DirectSubmissionNewResourceTlbFlush = -1
DirectSubmissionDisableCacheFlush = -1
DirectSubmissionPCIBarrier = -1
DirectSubmissionDetectGpuHang = -1
DirectSubmissionPrintBuffers = 0
DirectSubmissionMaxRingBuffers = -1
EnableDirectSubmissionController = -1
DirectSubmissionControllerTimeout = -1
DirectSubmissionMonitorFenceInputPolicy = -1
UseVmBind = -1
EnableNullHardware = 0
ForceLinearImages = 0
@@ -467,6 +470,7 @@ LogGdiCalls = 0
LogGdiCallsToFile = 0
UseContextEndOffsetForEventCompletion = -1
DirectSubmissionInsertExtraMiMemFenceCommands = -1
DirectSubmissionInsertSfenceInstructionPriorToSubmission = -1
EnableTimestampWaitForEvents = -1
ForceEvictOnlyIfNecessaryFlag = -1
ForceWddmLowPriorityContextValue = -1

View File

@@ -29,6 +29,40 @@
using DirectSubmissionTest = Test<DirectSubmissionFixture>;
HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledSetThenExpectNoCpuCacheFlush) {
DebugManagerStateRestore restore;
debugManager.flags.DirectSubmissionDisableCpuCacheFlush.set(1);
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
uintptr_t expectedPtrVal = 0;
CpuIntrinsicsTests::lastClFlushedPtr = 0;
void *ptr = reinterpret_cast<void *>(0xABCD00u);
size_t size = 64;
directSubmission.cpuCachelineFlush(ptr, size);
EXPECT_EQ(expectedPtrVal, CpuIntrinsicsTests::lastClFlushedPtr);
}
HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledNotSetThenExpectCpuCacheFlush) {
if (!CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureClflush)) {
GTEST_SKIP();
}
DebugManagerStateRestore restore;
debugManager.flags.DirectSubmissionDisableCpuCacheFlush.set(0);
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_FALSE(directSubmission.disableCpuCacheFlush);
uintptr_t expectedPtrVal = 0xABCD00u;
CpuIntrinsicsTests::lastClFlushedPtr = 0;
void *ptr = reinterpret_cast<void *>(expectedPtrVal);
size_t size = 64;
directSubmission.cpuCachelineFlush(ptr, size);
EXPECT_EQ(expectedPtrVal, CpuIntrinsicsTests::lastClFlushedPtr);
}
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionDisabledWhenStopThenRingIsNotStopped) {
VariableBackup<UltHwConfig> backup(&ultHwConfig);
ultHwConfig.csrBaseCallDirectSubmissionAvailable = true;
@@ -268,6 +302,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithCompletionFenceAllocatio
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) {
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
bool ret = directSubmission.initialize(true);
EXPECT_TRUE(ret);
@@ -383,6 +418,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionCurrentRingBuffersInUseWhenS
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionAllocateFailWhenRingIsStartedThenExpectRingNotStarted) {
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
directSubmission.allocateOsResourcesReturn = false;
bool ret = directSubmission.initialize(true);
@@ -486,9 +522,12 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchEndingSectionThe
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetDispatchSizeThenExpectCorrectSizeReturned) {
using Dispatcher = RenderDispatcher<FamilyType>;
DebugManagerStateRestore restorer;
debugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
size_t expectedSize = directSubmission.getSizeStartSection() +
Dispatcher::getSizeCacheFlush(directSubmission.rootDeviceEnvironment) +
directSubmission.getSizeSemaphoreSection(false) + directSubmission.getSizeNewResourceHandler();
size_t actualSize = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(true));
@@ -500,6 +539,7 @@ HWTEST_F(DirectSubmissionTest,
using Dispatcher = RenderDispatcher<FamilyType>;
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.disableCacheFlush = true;
size_t expectedSize = directSubmission.getSizeStartSection() +
directSubmission.getSizeSemaphoreSection(false) + directSubmission.getSizeNewResourceHandler();

View File

@@ -419,6 +419,56 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDefaultDirectSubmissionFlatRin
EXPECT_FALSE(directSubmission.copyCommandBufferIntoRing(batchBuffer));
}
HWTEST_F(DirectSubmissionDispatchBufferTest,
givenDirectSubmissionDisableCacheFlushWhenDispatchWorkloadCalledThenExpectStartWithoutCacheFlush) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using Dispatcher = RenderDispatcher<FamilyType>;
DebugManagerStateRestore restorer;
debugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false, false, regularDirectSubmission.dispatchMonitorFenceRequired(false));
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
directSubmission.disableCacheFlush = true;
bool ret = directSubmission.allocateResources();
EXPECT_TRUE(ret);
size_t flushSize = Dispatcher::getSizeCacheFlush(directSubmission.rootDeviceEnvironment);
size_t disabledSizeDispatch = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false));
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - flushSize));
directSubmission.dispatchWorkloadSection(batchBuffer, directSubmission.dispatchMonitorFenceRequired(batchBuffer.dispatchMonitorFence));
size_t expectedDispatchSize = disabledSizeDispatch - directSubmission.getSizeNewResourceHandler();
EXPECT_EQ(expectedDispatchSize, directSubmission.ringCommandStream.getUsed());
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
MI_BATCH_BUFFER_START *bbStart = hwParse.getCommand<MI_BATCH_BUFFER_START>();
ASSERT_NE(nullptr, bbStart);
bool foundFlush = false;
LinearStream parseDispatch;
uint8_t buffer[256];
parseDispatch.replaceBuffer(buffer, 256);
RenderDispatcher<FamilyType>::dispatchCacheFlush(parseDispatch, pDevice->getRootDeviceEnvironment(), 0ull);
auto expectedPipeControl = static_cast<PIPE_CONTROL *>(parseDispatch.getCpuBase());
for (auto it = hwParse.pipeControlList.begin(); it != hwParse.pipeControlList.end(); it++) {
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
if (memcmp(expectedPipeControl, pipeControl, sizeof(PIPE_CONTROL)) == 0) {
foundFlush = true;
break;
}
}
EXPECT_FALSE(foundFlush);
}
HWTEST_F(DirectSubmissionDispatchBufferTest,
givenDirectSubmissionRingStartAndSwitchBuffersWhenDispatchingCommandBufferThenExpectDispatchInCommandBufferAndQueueCountIncrease) {
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
@@ -771,6 +821,66 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenRingBufferRestartRequestWhenDi
EXPECT_EQ(directSubmission.submitCount, 1u);
}
HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenDispatchingWorkloadThenProgramSfenceInstruction) {
DebugManagerStateRestore restorer{};
using Dispatcher = BlitterDispatcher<FamilyType>;
FlushStampTracker flushStamp(true);
for (int32_t debugFlag : {-1, 0, 1, 2}) {
debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(debugFlag);
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.initialize(true));
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
uint32_t expectedMfenceCount = 0u;
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice && !pDevice->getProductHelper().isAcquireGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()) && expectedSfenceCount > 0u) {
--expectedSfenceCount;
++expectedMfenceCount;
}
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
}
}
HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenStoppingRingbufferThenProgramSfenceInstruction) {
DebugManagerStateRestore restorer{};
using Dispatcher = BlitterDispatcher<FamilyType>;
FlushStampTracker flushStamp(true);
for (int32_t debugFlag : {-1, 0, 1, 2}) {
debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(debugFlag);
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.initialize(true));
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
EXPECT_TRUE(directSubmission.stopRingBuffer(false));
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
uint32_t expectedMfenceCount = 0u;
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice && !directSubmission.pciBarrierPtr && !pDevice->getProductHelper().isAcquireGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()) && expectedSfenceCount > 0u) {
--expectedSfenceCount;
++expectedMfenceCount;
}
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
}
}
struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBufferTest {
void SetUp() override {
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);

View File

@@ -95,6 +95,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass::handleResidency;
using BaseClass::handleSwitchRingBuffers;
using BaseClass::immWritePostSyncOffset;
using BaseClass::inputMonitorFenceDispatchRequirement;
using BaseClass::isCompleted;
using BaseClass::isDisablePrefetcherRequired;
using BaseClass::isNewResourceHandleNeeded;
@@ -108,6 +109,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
using BaseClass::ringBuffers;
using BaseClass::ringStart;
using BaseClass::rootDeviceEnvironment;
using BaseClass::sfenceMode;
using BaseClass::submit;
using BaseClass::switchRingBuffers;
using BaseClass::tagAddress;
@@ -322,6 +324,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenPciBarrierWhenCreateDirectSubmissionThenP
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_NE(nullptr, directSubmission.pciBarrierPtr);
EXPECT_NE(DirectSubmissionSfenceMode::disabled, directSubmission.sfenceMode);
EXPECT_FALSE(directSubmission.miMemFenceRequired);
SysCalls::munmap(ptr, MemoryConstants::pageSize);
@@ -342,6 +345,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenPciBarrierWhenCreateDirectSubmissionAndMm
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(nullptr, directSubmission.pciBarrierPtr);
EXPECT_NE(DirectSubmissionSfenceMode::disabled, directSubmission.sfenceMode);
auto expectMiMemFence = device->getHardwareInfo().capabilityTable.isIntegratedDevice ? false : device->getRootDeviceEnvironment().getHelper<ProductHelper>().isAcquireGlobalFenceInDirectSubmissionRequired(device->getHardwareInfo());
EXPECT_EQ(directSubmission.miMemFenceRequired, expectMiMemFence);
@@ -362,6 +366,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenPciBarrierDisabledWhenCreateDirectSubmiss
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
EXPECT_EQ(nullptr, directSubmission.pciBarrierPtr);
EXPECT_NE(DirectSubmissionSfenceMode::disabled, directSubmission.sfenceMode);
auto expectMiMemFence = device->getHardwareInfo().capabilityTable.isIntegratedDevice ? false : device->getRootDeviceEnvironment().getHelper<ProductHelper>().isAcquireGlobalFenceInDirectSubmissionRequired(device->getHardwareInfo());
EXPECT_EQ(directSubmission.miMemFenceRequired, expectMiMemFence);
@@ -1208,6 +1213,124 @@ HWTEST_F(DrmDirectSubmissionTest, givenDrmDirectSubmissionWhenEnableRingSwitchTa
drmDirectSubmission.ringStart = false;
}
HWTEST_F(DrmDirectSubmissionTest, givenDrmDirectSubmissionWhenGettingDefaultInputMonitorFencePolicyThenDefaultIsTrue) {
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(drmDirectSubmission.inputMonitorFenceDispatchRequirement);
}
HWTEST_F(DrmDirectSubmissionTest,
givenDrmDirectSubmissionWithStallingCommandInputMonitorFencePolicyWhenDispatchingWorkloadWithDisabledMonitorFenceThenDrmIgnoresInputFlag) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
DebugManagerStateRestore dbgRestorer;
debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(0);
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_FALSE(drmDirectSubmission.inputMonitorFenceDispatchRequirement);
FlushStampTracker flushStamp(true);
EXPECT_TRUE(drmDirectSubmission.initialize(false));
BatchBuffer batchBuffer = {};
GraphicsAllocation *commandBuffer = nullptr;
LinearStream stream;
const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000,
AllocationType::commandBuffer, device->getDeviceBitfield()};
commandBuffer = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties);
stream.replaceGraphicsAllocation(commandBuffer);
stream.replaceBuffer(commandBuffer->getUnderlyingBuffer(), commandBuffer->getUnderlyingBufferSize());
stream.getSpace(0x20);
memset(stream.getCpuBase(), 0, 0x20);
batchBuffer.endCmdPtr = ptrOffset(stream.getCpuBase(), 0x20);
batchBuffer.commandBufferAllocation = commandBuffer;
batchBuffer.usedSize = 0x40;
batchBuffer.taskStartAddress = 0x881112340000;
batchBuffer.stream = &stream;
batchBuffer.hasStallingCmds = true;
EXPECT_TRUE(drmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(drmDirectSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
bool foundFenceUpdate = false;
for (auto &it : hwParse.pipeControlList) {
PIPE_CONTROL *pipeControl = reinterpret_cast<PIPE_CONTROL *>(it);
if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
foundFenceUpdate = true;
break;
}
}
EXPECT_FALSE(foundFenceUpdate);
executionEnvironment.memoryManager->freeGraphicsMemory(commandBuffer);
*drmDirectSubmission.tagAddress = 1;
}
HWTEST_F(DrmDirectSubmissionTest,
givenDrmDirectSubmissionWithExplicitFlagInputMonitorFencePolicyWhenDispatchingWorkloadWithDisabledMonitorFenceThenDrmIgnoresInputFlag) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
DebugManagerStateRestore dbgRestorer;
debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(1);
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(drmDirectSubmission.inputMonitorFenceDispatchRequirement);
FlushStampTracker flushStamp(true);
EXPECT_TRUE(drmDirectSubmission.initialize(false));
BatchBuffer batchBuffer = {};
GraphicsAllocation *commandBuffer = nullptr;
LinearStream stream;
const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000,
AllocationType::commandBuffer, device->getDeviceBitfield()};
commandBuffer = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties);
stream.replaceGraphicsAllocation(commandBuffer);
stream.replaceBuffer(commandBuffer->getUnderlyingBuffer(), commandBuffer->getUnderlyingBufferSize());
stream.getSpace(0x20);
memset(stream.getCpuBase(), 0, 0x20);
batchBuffer.endCmdPtr = ptrOffset(stream.getCpuBase(), 0x20);
batchBuffer.commandBufferAllocation = commandBuffer;
batchBuffer.usedSize = 0x40;
batchBuffer.taskStartAddress = 0x881112340000;
batchBuffer.stream = &stream;
batchBuffer.dispatchMonitorFence = true;
EXPECT_TRUE(drmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(drmDirectSubmission.ringCommandStream, 0);
hwParse.findHardwareCommands<FamilyType>();
bool foundFenceUpdate = false;
for (auto &it : hwParse.pipeControlList) {
PIPE_CONTROL *pipeControl = reinterpret_cast<PIPE_CONTROL *>(it);
if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
foundFenceUpdate = true;
break;
}
}
EXPECT_FALSE(foundFenceUpdate);
executionEnvironment.memoryManager->freeGraphicsMemory(commandBuffer);
*drmDirectSubmission.tagAddress = 1;
}
HWTEST_F(DrmDirectSubmissionTest, givenGpuHangWhenWaitCalledThenGpuHangDetected) {
using Dispatcher = RenderDispatcher<FamilyType>;

View File

@@ -799,6 +799,7 @@ HWTEST_F(WddmDirectSubmissionTest,
FlushStampTracker flushStamp(true);
MockWddmDirectSubmission<FamilyType, Dispatcher> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement);
bool ret = wddmDirectSubmission.initialize(true);
EXPECT_TRUE(ret);
@@ -862,6 +863,7 @@ HWTEST_F(WddmDirectSubmissionTest,
FlushStampTracker flushStamp(true);
MockWddmDirectSubmission<FamilyType, Dispatcher> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement);
bool ret = wddmDirectSubmission.initialize(true);
EXPECT_TRUE(ret);
@@ -903,6 +905,128 @@ HWTEST_F(WddmDirectSubmissionTest,
EXPECT_TRUE(wddmDirectSubmission.dispatchMonitorFenceRequired(true));
}
HWTEST_F(WddmDirectSubmissionTest,
givenWddmDirectSubmissionWithDisabledMonitorFenceWhenInputPolicyIsStallingCommandAndBatchBufferDispatchedWithExplicitMonitorFenceFlagThenDispatchNoPostSyncOperation) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
using Dispatcher = RenderDispatcher<FamilyType>;
DebugManagerStateRestore dbgRestorer;
debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(0);
BatchBuffer batchBuffer = {};
GraphicsAllocation *clientCommandBuffer = nullptr;
std::unique_ptr<LinearStream> clientStream;
auto memoryManager = executionEnvironment->memoryManager.get();
const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000,
AllocationType::commandBuffer, device->getDeviceBitfield()};
clientCommandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties);
ASSERT_NE(nullptr, clientCommandBuffer);
clientStream = std::make_unique<LinearStream>(clientCommandBuffer);
clientStream->getSpace(0x40);
memset(clientStream->getCpuBase(), 0, 0x20);
batchBuffer.endCmdPtr = ptrOffset(clientStream->getCpuBase(), 0x20);
batchBuffer.commandBufferAllocation = clientCommandBuffer;
batchBuffer.usedSize = 0x40;
batchBuffer.taskStartAddress = clientCommandBuffer->getGpuAddress();
batchBuffer.stream = clientStream.get();
batchBuffer.dispatchMonitorFence = true;
FlushStampTracker flushStamp(true);
MockWddmDirectSubmission<FamilyType, Dispatcher> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_FALSE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement);
bool ret = wddmDirectSubmission.initialize(true);
EXPECT_TRUE(ret);
size_t sizeUsedBefore = wddmDirectSubmission.ringCommandStream.getUsed();
ret = wddmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_TRUE(ret);
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(wddmDirectSubmission.ringCommandStream, sizeUsedBefore);
hwParse.findHardwareCommands<FamilyType>();
bool foundFenceUpdate = false;
for (auto it = hwParse.pipeControlList.begin(); it != hwParse.pipeControlList.end(); it++) {
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
foundFenceUpdate = true;
break;
}
}
EXPECT_FALSE(foundFenceUpdate);
memoryManager->freeGraphicsMemory(clientCommandBuffer);
}
HWTEST_F(WddmDirectSubmissionTest,
givenWddmDirectSubmissionWithDisabledMonitorFenceWhenInputPolicyIsExplicitMonitorFenceAndBatchBufferDispatchedWithStallingCommandFlagThenDispatchNoPostSyncOperation) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
using Dispatcher = RenderDispatcher<FamilyType>;
DebugManagerStateRestore dbgRestorer;
debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(1);
BatchBuffer batchBuffer = {};
GraphicsAllocation *clientCommandBuffer = nullptr;
std::unique_ptr<LinearStream> clientStream;
auto memoryManager = executionEnvironment->memoryManager.get();
const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000,
AllocationType::commandBuffer, device->getDeviceBitfield()};
clientCommandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties);
ASSERT_NE(nullptr, clientCommandBuffer);
clientStream = std::make_unique<LinearStream>(clientCommandBuffer);
clientStream->getSpace(0x40);
memset(clientStream->getCpuBase(), 0, 0x20);
batchBuffer.endCmdPtr = ptrOffset(clientStream->getCpuBase(), 0x20);
batchBuffer.commandBufferAllocation = clientCommandBuffer;
batchBuffer.usedSize = 0x40;
batchBuffer.taskStartAddress = clientCommandBuffer->getGpuAddress();
batchBuffer.stream = clientStream.get();
batchBuffer.hasStallingCmds = true;
FlushStampTracker flushStamp(true);
MockWddmDirectSubmission<FamilyType, Dispatcher> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement);
bool ret = wddmDirectSubmission.initialize(true);
EXPECT_TRUE(ret);
size_t sizeUsedBefore = wddmDirectSubmission.ringCommandStream.getUsed();
ret = wddmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
EXPECT_TRUE(ret);
HardwareParse hwParse;
hwParse.parsePipeControl = true;
hwParse.parseCommands<FamilyType>(wddmDirectSubmission.ringCommandStream, sizeUsedBefore);
hwParse.findHardwareCommands<FamilyType>();
bool foundFenceUpdate = false;
for (auto it = hwParse.pipeControlList.begin(); it != hwParse.pipeControlList.end(); it++) {
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
foundFenceUpdate = true;
break;
}
}
EXPECT_FALSE(foundFenceUpdate);
memoryManager->freeGraphicsMemory(clientCommandBuffer);
}
HWTEST_F(WddmDirectSubmissionTest,
givenBatchBufferWithThrottleLowWhenCallDispatchCommandBufferThenStoreLastSubmitedThrottle) {
@@ -1025,6 +1149,34 @@ HWTEST_F(WddmDirectSubmissionTest, givenDirectSubmissionWhenUnblockPagingFenceSe
EXPECT_GT(wddmDirectSubmission.semaphoreData->pagingFenceCounter, mockedPagingFence);
}
HWTEST_F(WddmDirectSubmissionTest, givenDebugFlagSetWhenUnblockPagingFenceSemaphoreThenProgramSfenceInstruction) {
using Dispatcher = RenderDispatcher<FamilyType>;
DebugManagerStateRestore restorer{};
FlushStampTracker flushStamp(true);
for (int32_t debugFlag : {-1, 0, 1, 2}) {
debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(debugFlag);
MockWddmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device->getDefaultEngine().commandStreamReceiver);
EXPECT_TRUE(directSubmission.initialize(true));
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
directSubmission.unblockPagingFenceSemaphore(0u);
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
uint32_t expectedMfenceCount = 0u;
if (!device->getHardwareInfo().capabilityTable.isIntegratedDevice && !directSubmission.pciBarrierPtr && !device->getProductHelper().isAcquireGlobalFenceInDirectSubmissionRequired(device->getHardwareInfo()) && expectedSfenceCount > 0u) {
--expectedSfenceCount;
++expectedMfenceCount;
}
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
}
}
TEST(DirectSubmissionControllerWindowsTest, givenDirectSubmissionControllerWhenCallingSleepThenRequestHighResolutionTimers) {
VariableBackup<size_t> timeBeginPeriodCalledBackup(&SysCalls::timeBeginPeriodCalled, 0u);
VariableBackup<MMRESULT> timeBeginPeriodLastValueBackup(&SysCalls::timeBeginPeriodLastValue, 0u);

View File

@@ -38,6 +38,7 @@ struct MockWddmDirectSubmission : public WddmDirectSubmission<GfxFamily, Dispatc
using BaseClass::handleResidency;
using BaseClass::handleStopRingBuffer;
using BaseClass::handleSwitchRingBuffers;
using BaseClass::inputMonitorFenceDispatchRequirement;
using BaseClass::isCompleted;
using BaseClass::isDisablePrefetcherRequired;
using BaseClass::isNewResourceHandleNeeded;