mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
Revert "refactor: Remove unused ulls functionalities"
This reverts commit 8ab4e1bcb8.
Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
74c7c625b8
commit
7610d7c90a
@@ -448,10 +448,12 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionBufferPlacement, -1, "-1: do not
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSemaphorePlacement, -1, "-1: do not override, 0: non-system, 1: system")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionBufferAddressing, -1, "-1: do not override, 0: not use 48bit, 1: use 48bit")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSemaphoreAddressing, -1, "-1: do not override, 0: not use 48bit, 1: use 48bit")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCpuCacheFlush, -1, "-1: do not override, 0: disable, 1: enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDrmContext, -1, "Create special drm context: -1: default, when new residency model available, 0: disable, 1: enable")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideBlitterSupport, -1, "Overrides default blitter support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideRenderSupport, -1, "Overrides default render support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionOverrideComputeSupport, -1, "Overrides default compute support: -1: do not override, 0: disable engine support, 1: enable engine support with init start, 2: enable engine support without init start")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisableCacheFlush, -1, "-1: driver default, 0: additional cache flush is present 1: disable dispatching cache flush commands")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionNewResourceTlbFlush, -1, "-1: driver default - flush when new resource is bound, 0: disabled, 1: enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDetectGpuHang, -1, "-1: default, 0: disable gpu hang detection after raising ulls semaphore, 1: enable gpu hang detection after raising ulls semaphore")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionFlatRingBuffer, -1, "-1: default, 0: disable, 1: enable, Copies task command buffer directly into ring, implemented for immediate command lists only")
|
||||
@@ -463,6 +465,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionForceLocalMemoryStorageMode, -1,
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableRingSwitchTagUpdateWa, -1, "-1: default, 0 - disable, 1 - enable. If enabled, completionFences wont be updated if ring is not running.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPCIBarrier, -1, "Use PCI barrier for data synchronization before semaphore unblock -1: default, 0 - disable, 1 - enable.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertExtraMiMemFenceCommands, -1, "-1: default, 0 - disable, 1 - enable. If enabled, add extra MI_MEM_FENCE instructions with acquire bit set")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionInsertSfenceInstructionPriorToSubmission, -1, "-1: default, 0 - disable, 1 - Insert _mm_sfence before unlocking semaphore only, 2 - insert before and after semaphore")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMaxRingBuffers, -1, "-1: default, >0: max ring buffer count, During switch ring buffer, if there is no available ring, wait for completion instead of allocating new one if DirectSubmissionMaxRingBuffers is reached")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisablePrefetcher, -1, "-1: default, 0 - disable, 1 - enable. If enabled, disable prefetcher is being dispatched")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: default, 0 - disable, 1 - enable. If enabled, tasks sent to direct submission ring may be dispatched out of order")
|
||||
@@ -471,6 +474,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingMinNumberOfClients, -1, "-1: default, >0: Enables RelaxedOrdering mode only if specified number of clients is assigned to given CSR.")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingCounterHeuristic, -1, "-1: default, 0: disabled, 1: enabled. If set use counter based heuristic to allow for relaxed ordering dispatch")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingCounterHeuristicTreshold, -1, "-1: default, >0: limit number of append calls to disable relaxed ordering dispatch")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMonitorFenceInputPolicy, -1, "-1: default, 0: stalling command flag, 1: explicit monitor fence flag. Selects policy to dispatch monitor fence upon input flag, either for every stalling command or explicit motor fence dispatch")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionPrintSemaphoreUsage, -1, "-1: default, 0: disabled, 1: enabled. If set, print DirectSubmission semaphore programming and unlocking")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionSwitchSemaphoreMode, -1, "-1: default, 1: enable switch on unsuccessful, 0: disable switch on unsuccessful")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
|
||||
|
||||
@@ -41,6 +41,16 @@ struct TagData {
|
||||
uint64_t tagValue = 0ull;
|
||||
};
|
||||
|
||||
enum class DirectSubmissionSfenceMode : int32_t {
|
||||
disabled = 0,
|
||||
beforeSemaphoreOnly = 1,
|
||||
beforeAndAfterSemaphore = 2
|
||||
};
|
||||
|
||||
namespace UllsDefaults {
|
||||
inline constexpr bool defaultDisableCacheFlush = true;
|
||||
} // namespace UllsDefaults
|
||||
|
||||
struct BatchBuffer;
|
||||
class FlushStampTracker;
|
||||
class GraphicsAllocation;
|
||||
@@ -94,14 +104,18 @@ class DirectSubmissionHw {
|
||||
protected:
|
||||
struct SemaphoreFenceHelper : public NonCopyableAndNonMovableClass {
|
||||
SemaphoreFenceHelper(const auto &directSubmission) : directSubmission(directSubmission) {
|
||||
if (!directSubmission.miMemFenceRequired && !directSubmission.pciBarrierPtr && !directSubmission.hwInfo->capabilityTable.isIntegratedDevice) {
|
||||
CpuIntrinsics::mfence();
|
||||
} else {
|
||||
CpuIntrinsics::sfence();
|
||||
if (directSubmission.sfenceMode >= DirectSubmissionSfenceMode::beforeSemaphoreOnly) {
|
||||
if (!directSubmission.miMemFenceRequired && !directSubmission.pciBarrierPtr && !directSubmission.hwInfo->capabilityTable.isIntegratedDevice) {
|
||||
CpuIntrinsics::mfence();
|
||||
} else {
|
||||
CpuIntrinsics::sfence();
|
||||
}
|
||||
}
|
||||
}
|
||||
~SemaphoreFenceHelper() {
|
||||
CpuIntrinsics::sfence();
|
||||
if (directSubmission.sfenceMode == DirectSubmissionSfenceMode::beforeAndAfterSemaphore) {
|
||||
CpuIntrinsics::sfence();
|
||||
}
|
||||
}
|
||||
|
||||
const DirectSubmissionHw<GfxFamily, Dispatcher> &directSubmission;
|
||||
@@ -138,6 +152,8 @@ class DirectSubmissionHw {
|
||||
bool submitCommandBufferToGpu(bool needStart, uint64_t gpuAddress, size_t size, bool needWait, const ResidencyContainer *allocationsForResidency);
|
||||
bool copyCommandBufferIntoRing(BatchBuffer &batchBuffer);
|
||||
|
||||
void cpuCachelineFlush(void *ptr, size_t size);
|
||||
|
||||
void dispatchSemaphoreSection(uint32_t value);
|
||||
size_t getSizeSemaphoreSection(bool relaxedOrderingSchedulerRequired);
|
||||
|
||||
@@ -243,11 +259,14 @@ class DirectSubmissionHw {
|
||||
uint32_t activeTiles = 1u;
|
||||
uint32_t immWritePostSyncOffset = 0u;
|
||||
uint32_t currentRelaxedOrderingQueueSize = 0;
|
||||
DirectSubmissionSfenceMode sfenceMode = DirectSubmissionSfenceMode::beforeAndAfterSemaphore;
|
||||
volatile uint32_t reserved = 0u;
|
||||
uint32_t dispatchErrorCode = 0;
|
||||
QueueThrottle lastSubmittedThrottle = QueueThrottle::MEDIUM;
|
||||
|
||||
bool ringStart = false;
|
||||
bool disableCpuCacheFlush = true;
|
||||
bool disableCacheFlush = false;
|
||||
bool partitionedMode = false;
|
||||
bool partitionConfigSet = true;
|
||||
bool miMemFenceRequired = false;
|
||||
@@ -259,6 +278,7 @@ class DirectSubmissionHw {
|
||||
bool relaxedOrderingEnabled = false;
|
||||
bool relaxedOrderingInitialized = false;
|
||||
bool relaxedOrderingSchedulerRequired = false;
|
||||
bool inputMonitorFenceDispatchRequirement = true;
|
||||
bool notifyKmdDuringMonitorFence = false;
|
||||
};
|
||||
} // namespace NEO
|
||||
|
||||
@@ -49,10 +49,16 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
|
||||
auto &productHelper = inputParams.rootDeviceEnvironment.getHelper<ProductHelper>();
|
||||
auto &compilerProductHelper = inputParams.rootDeviceEnvironment.getHelper<CompilerProductHelper>();
|
||||
|
||||
disableCacheFlush = UllsDefaults::defaultDisableCacheFlush;
|
||||
|
||||
if (debugManager.flags.DirectSubmissionMaxRingBuffers.get() != -1) {
|
||||
this->maxRingBufferCount = debugManager.flags.DirectSubmissionMaxRingBuffers.get();
|
||||
}
|
||||
|
||||
if (debugManager.flags.DirectSubmissionDisableCacheFlush.get() != -1) {
|
||||
disableCacheFlush = !!debugManager.flags.DirectSubmissionDisableCacheFlush.get();
|
||||
}
|
||||
|
||||
if (debugManager.flags.DirectSubmissionDetectGpuHang.get() != -1) {
|
||||
detectGpuHang = !!debugManager.flags.DirectSubmissionDetectGpuHang.get();
|
||||
}
|
||||
@@ -67,11 +73,26 @@ DirectSubmissionHw<GfxFamily, Dispatcher>::DirectSubmissionHw(const DirectSubmis
|
||||
this->systemMemoryFenceAddressSet = true;
|
||||
}
|
||||
|
||||
if (debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get() != -1) {
|
||||
sfenceMode = static_cast<DirectSubmissionSfenceMode>(debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get());
|
||||
}
|
||||
|
||||
if (debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.get() != -1) {
|
||||
this->inputMonitorFenceDispatchRequirement = !!(debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.get());
|
||||
}
|
||||
|
||||
int32_t disableCacheFlushKey = debugManager.flags.DirectSubmissionDisableCpuCacheFlush.get();
|
||||
if (disableCacheFlushKey != -1) {
|
||||
disableCpuCacheFlush = (disableCacheFlushKey == 1);
|
||||
}
|
||||
|
||||
isDisablePrefetcherRequired = productHelper.isPrefetcherDisablingInDirectSubmissionRequired();
|
||||
if (debugManager.flags.DirectSubmissionDisablePrefetcher.get() != -1) {
|
||||
isDisablePrefetcherRequired = !!debugManager.flags.DirectSubmissionDisablePrefetcher.get();
|
||||
}
|
||||
|
||||
UNRECOVERABLE_IF(!CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureClflush) && !disableCpuCacheFlush);
|
||||
|
||||
setImmWritePostSyncOffset();
|
||||
|
||||
dcFlushRequired = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, inputParams.rootDeviceEnvironment);
|
||||
@@ -174,6 +195,7 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::allocateResources() {
|
||||
semaphoreData = static_cast<volatile RingSemaphoreData *>(semaphorePtr);
|
||||
memset(semaphorePtr, 0, sizeof(RingSemaphoreData));
|
||||
semaphoreData->queueWorkCount = 0;
|
||||
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
|
||||
|
||||
this->gpuVaForMiFlush = this->semaphoreGpuVa + offsetof(RingSemaphoreData, miFlushSpace);
|
||||
this->gpuVaForPagingFenceSemaphore = this->semaphoreGpuVa + offsetof(RingSemaphoreData, pagingFenceCounter);
|
||||
@@ -212,6 +234,25 @@ inline void DirectSubmissionHw<GfxFamily, Dispatcher>::unblockGpu() {
|
||||
semaphoreData->queueWorkCount = currentQueueWorkCount;
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
inline void DirectSubmissionHw<GfxFamily, Dispatcher>::cpuCachelineFlush(void *ptr, size_t size) {
|
||||
if (disableCpuCacheFlush) {
|
||||
return;
|
||||
}
|
||||
constexpr size_t cachlineBit = 6;
|
||||
static_assert(MemoryConstants::cacheLineSize == 1 << cachlineBit, "cachlineBit has invalid value");
|
||||
char *flushPtr = reinterpret_cast<char *>(ptr);
|
||||
char *flushEndPtr = reinterpret_cast<char *>(ptr) + size;
|
||||
|
||||
flushPtr = alignDown(flushPtr, MemoryConstants::cacheLineSize);
|
||||
flushEndPtr = alignUp(flushEndPtr, MemoryConstants::cacheLineSize);
|
||||
size_t cachelines = (flushEndPtr - flushPtr) >> cachlineBit;
|
||||
for (size_t i = 0; i < cachelines; i++) {
|
||||
CpuIntrinsics::clFlush(flushPtr);
|
||||
flushPtr += MemoryConstants::cacheLineSize;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename GfxFamily, typename Dispatcher>
|
||||
bool DirectSubmissionHw<GfxFamily, Dispatcher>::initialize(bool submitOnInit) {
|
||||
bool ret = allocateResources();
|
||||
@@ -260,10 +301,12 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer(bool blocking) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool relaxedOrderingSchedulerWasRequired = this->relaxedOrderingSchedulerRequired;
|
||||
if (this->relaxedOrderingEnabled && this->relaxedOrderingSchedulerRequired) {
|
||||
dispatchRelaxedOrderingQueueStall();
|
||||
}
|
||||
|
||||
void *flushPtr = ringCommandStream.getSpace(0);
|
||||
Dispatcher::dispatchCacheFlush(ringCommandStream, this->rootDeviceEnvironment, gpuVaForMiFlush);
|
||||
dispatchStopRingBufferSection();
|
||||
Dispatcher::dispatchStopCommandBuffer(ringCommandStream);
|
||||
@@ -272,7 +315,9 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::stopRingBuffer(bool blocking) {
|
||||
EncodeNoop<GfxFamily>::emitNoop(ringCommandStream, bytesToPad);
|
||||
EncodeNoop<GfxFamily>::alignToCacheLine(ringCommandStream);
|
||||
|
||||
cpuCachelineFlush(flushPtr, getSizeEnd(relaxedOrderingSchedulerWasRequired));
|
||||
this->unblockGpu();
|
||||
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
|
||||
|
||||
this->handleStopRingBuffer();
|
||||
this->ringStart = false;
|
||||
@@ -376,6 +421,9 @@ inline size_t DirectSubmissionHw<GfxFamily, Dispatcher>::getSizeDispatch(bool re
|
||||
size += RelaxedOrderingHelper::getSizeReturnPtrRegs<GfxFamily>();
|
||||
}
|
||||
|
||||
if (!disableCacheFlush) {
|
||||
size += Dispatcher::getSizeCacheFlush(rootDeviceEnvironment);
|
||||
}
|
||||
if (dispatchMonitorFence) {
|
||||
size += Dispatcher::getSizeMonitorFence(rootDeviceEnvironment);
|
||||
}
|
||||
@@ -458,6 +506,10 @@ void *DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchWorkloadSection(BatchBu
|
||||
}
|
||||
}
|
||||
|
||||
if (!disableCacheFlush) {
|
||||
Dispatcher::dispatchCacheFlush(ringCommandStream, this->rootDeviceEnvironment, gpuVaForMiFlush);
|
||||
}
|
||||
|
||||
if (dispatchMonitorFence) {
|
||||
TagData currentTagData = {};
|
||||
getTagAddressValue(currentTagData);
|
||||
@@ -528,7 +580,13 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
|
||||
|
||||
lastSubmittedThrottle = batchBuffer.throttle;
|
||||
bool relaxedOrderingSchedulerWillBeNeeded = (this->relaxedOrderingSchedulerRequired || batchBuffer.hasRelaxedOrderingDependencies);
|
||||
bool dispatchMonitorFence = this->dispatchMonitorFenceRequired(batchBuffer.dispatchMonitorFence);
|
||||
bool inputRequiredMonitorFence = false;
|
||||
if (this->inputMonitorFenceDispatchRequirement) {
|
||||
inputRequiredMonitorFence = batchBuffer.dispatchMonitorFence;
|
||||
} else {
|
||||
inputRequiredMonitorFence = batchBuffer.hasStallingCmds;
|
||||
}
|
||||
bool dispatchMonitorFence = this->dispatchMonitorFenceRequired(inputRequiredMonitorFence);
|
||||
|
||||
size_t dispatchSize = this->getUllsStateSize() + getSizeDispatch(relaxedOrderingSchedulerWillBeNeeded, batchBuffer.hasRelaxedOrderingDependencies, dispatchMonitorFence);
|
||||
|
||||
@@ -568,13 +626,16 @@ bool DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchCommandBuffer(BatchBuffe
|
||||
|
||||
handleNewResourcesSubmission();
|
||||
|
||||
dispatchWorkloadSection(batchBuffer, dispatchMonitorFence);
|
||||
void *currentPosition = dispatchWorkloadSection(batchBuffer, dispatchMonitorFence);
|
||||
|
||||
cpuCachelineFlush(currentPosition, dispatchSize);
|
||||
|
||||
auto requiresBlockingResidencyHandling = batchBuffer.pagingFenceSemInfo.requiresBlockingResidencyHandling;
|
||||
if (!this->submitCommandBufferToGpu(needStart, startVA, requiredMinimalSize, requiresBlockingResidencyHandling, batchBuffer.allocationsForResidency)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
cpuCachelineFlush(semaphorePtr, MemoryConstants::cacheLineSize);
|
||||
currentQueueWorkCount++;
|
||||
|
||||
uint64_t flushValue = updateTagValue(dispatchMonitorFence);
|
||||
@@ -651,10 +712,12 @@ inline uint64_t DirectSubmissionHw<GfxFamily, Dispatcher>::switchRingBuffers(Res
|
||||
|
||||
this->handleRingRestartForUllsLightResidency(allocationsForResidency);
|
||||
|
||||
void *flushPtr = ringCommandStream.getSpace(0);
|
||||
uint64_t currentBufferGpuVa = ringCommandStream.getCurrentGpuAddressPosition();
|
||||
|
||||
if (ringStart) {
|
||||
dispatchSwitchRingBufferSection(nextRingBuffer->getGpuAddress());
|
||||
cpuCachelineFlush(flushPtr, getSizeSwitchRingBufferSection());
|
||||
}
|
||||
|
||||
ringCommandStream.replaceBuffer(nextRingBuffer->getUnderlyingBuffer(), ringCommandStream.getMaxAvailableSpace());
|
||||
|
||||
@@ -18,6 +18,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::allocateResources;
|
||||
using BaseClass::completionFenceAllocation;
|
||||
using BaseClass::copyCommandBufferIntoRing;
|
||||
using BaseClass::cpuCachelineFlush;
|
||||
using BaseClass::currentQueueWorkCount;
|
||||
using BaseClass::currentRelaxedOrderingQueueSize;
|
||||
using BaseClass::currentRingBuffer;
|
||||
@@ -26,6 +27,8 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::deferredTasksListAllocation;
|
||||
using BaseClass::detectGpuHang;
|
||||
using BaseClass::DirectSubmissionHw;
|
||||
using BaseClass::disableCacheFlush;
|
||||
using BaseClass::disableCpuCacheFlush;
|
||||
using BaseClass::dispatchDisablePrefetcher;
|
||||
using BaseClass::dispatchMonitorFenceRequired;
|
||||
using BaseClass::dispatchPartitionRegisterConfiguration;
|
||||
@@ -51,6 +54,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw<GfxFamily, Dispatcher>
|
||||
using BaseClass::globalFenceAllocation;
|
||||
using BaseClass::hwInfo;
|
||||
using BaseClass::immWritePostSyncOffset;
|
||||
using BaseClass::inputMonitorFenceDispatchRequirement;
|
||||
using BaseClass::isDisablePrefetcherRequired;
|
||||
using BaseClass::lastSubmittedThrottle;
|
||||
using BaseClass::miMemFenceRequired;
|
||||
|
||||
@@ -111,13 +111,16 @@ DirectSubmissionBufferPlacement = -1
|
||||
DirectSubmissionSemaphorePlacement = -1
|
||||
DirectSubmissionBufferAddressing = -1
|
||||
DirectSubmissionSemaphoreAddressing = -1
|
||||
DirectSubmissionDisableCpuCacheFlush = -1
|
||||
DirectSubmissionNewResourceTlbFlush = -1
|
||||
DirectSubmissionDisableCacheFlush = -1
|
||||
DirectSubmissionPCIBarrier = -1
|
||||
DirectSubmissionDetectGpuHang = -1
|
||||
DirectSubmissionPrintBuffers = 0
|
||||
DirectSubmissionMaxRingBuffers = -1
|
||||
EnableDirectSubmissionController = -1
|
||||
DirectSubmissionControllerTimeout = -1
|
||||
DirectSubmissionMonitorFenceInputPolicy = -1
|
||||
UseVmBind = -1
|
||||
EnableNullHardware = 0
|
||||
ForceLinearImages = 0
|
||||
@@ -467,6 +470,7 @@ LogGdiCalls = 0
|
||||
LogGdiCallsToFile = 0
|
||||
UseContextEndOffsetForEventCompletion = -1
|
||||
DirectSubmissionInsertExtraMiMemFenceCommands = -1
|
||||
DirectSubmissionInsertSfenceInstructionPriorToSubmission = -1
|
||||
EnableTimestampWaitForEvents = -1
|
||||
ForceEvictOnlyIfNecessaryFlag = -1
|
||||
ForceWddmLowPriorityContextValue = -1
|
||||
|
||||
@@ -29,6 +29,40 @@
|
||||
|
||||
using DirectSubmissionTest = Test<DirectSubmissionFixture>;
|
||||
|
||||
HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledSetThenExpectNoCpuCacheFlush) {
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.DirectSubmissionDisableCpuCacheFlush.set(1);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
|
||||
|
||||
uintptr_t expectedPtrVal = 0;
|
||||
CpuIntrinsicsTests::lastClFlushedPtr = 0;
|
||||
void *ptr = reinterpret_cast<void *>(0xABCD00u);
|
||||
size_t size = 64;
|
||||
directSubmission.cpuCachelineFlush(ptr, size);
|
||||
EXPECT_EQ(expectedPtrVal, CpuIntrinsicsTests::lastClFlushedPtr);
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledNotSetThenExpectCpuCacheFlush) {
|
||||
if (!CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureClflush)) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
DebugManagerStateRestore restore;
|
||||
debugManager.flags.DirectSubmissionDisableCpuCacheFlush.set(0);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_FALSE(directSubmission.disableCpuCacheFlush);
|
||||
|
||||
uintptr_t expectedPtrVal = 0xABCD00u;
|
||||
CpuIntrinsicsTests::lastClFlushedPtr = 0;
|
||||
void *ptr = reinterpret_cast<void *>(expectedPtrVal);
|
||||
size_t size = 64;
|
||||
directSubmission.cpuCachelineFlush(ptr, size);
|
||||
EXPECT_EQ(expectedPtrVal, CpuIntrinsicsTests::lastClFlushedPtr);
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionDisabledWhenStopThenRingIsNotStopped) {
|
||||
VariableBackup<UltHwConfig> backup(&ultHwConfig);
|
||||
ultHwConfig.csrBaseCallDirectSubmissionAvailable = true;
|
||||
@@ -268,6 +302,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWithCompletionFenceAllocatio
|
||||
|
||||
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) {
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
|
||||
|
||||
bool ret = directSubmission.initialize(true);
|
||||
EXPECT_TRUE(ret);
|
||||
@@ -383,6 +418,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionCurrentRingBuffersInUseWhenS
|
||||
|
||||
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionAllocateFailWhenRingIsStartedThenExpectRingNotStarted) {
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(directSubmission.disableCpuCacheFlush);
|
||||
|
||||
directSubmission.allocateOsResourcesReturn = false;
|
||||
bool ret = directSubmission.initialize(true);
|
||||
@@ -486,9 +522,12 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchEndingSectionThe
|
||||
|
||||
HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetDispatchSizeThenExpectCorrectSizeReturned) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
size_t expectedSize = directSubmission.getSizeStartSection() +
|
||||
Dispatcher::getSizeCacheFlush(directSubmission.rootDeviceEnvironment) +
|
||||
directSubmission.getSizeSemaphoreSection(false) + directSubmission.getSizeNewResourceHandler();
|
||||
|
||||
size_t actualSize = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(true));
|
||||
@@ -500,6 +539,7 @@ HWTEST_F(DirectSubmissionTest,
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.disableCacheFlush = true;
|
||||
size_t expectedSize = directSubmission.getSizeStartSection() +
|
||||
directSubmission.getSizeSemaphoreSection(false) + directSubmission.getSizeNewResourceHandler();
|
||||
|
||||
|
||||
@@ -419,6 +419,56 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDefaultDirectSubmissionFlatRin
|
||||
EXPECT_FALSE(directSubmission.copyCommandBufferIntoRing(batchBuffer));
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
givenDirectSubmissionDisableCacheFlushWhenDispatchWorkloadCalledThenExpectStartWithoutCacheFlush) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.DirectSubmissionDisableCacheFlush.set(0);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> regularDirectSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(false, false, regularDirectSubmission.dispatchMonitorFenceRequired(false));
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
directSubmission.disableCacheFlush = true;
|
||||
bool ret = directSubmission.allocateResources();
|
||||
EXPECT_TRUE(ret);
|
||||
|
||||
size_t flushSize = Dispatcher::getSizeCacheFlush(directSubmission.rootDeviceEnvironment);
|
||||
|
||||
size_t disabledSizeDispatch = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false));
|
||||
EXPECT_EQ(disabledSizeDispatch, (regularSizeDispatch - flushSize));
|
||||
|
||||
directSubmission.dispatchWorkloadSection(batchBuffer, directSubmission.dispatchMonitorFenceRequired(batchBuffer.dispatchMonitorFence));
|
||||
size_t expectedDispatchSize = disabledSizeDispatch - directSubmission.getSizeNewResourceHandler();
|
||||
EXPECT_EQ(expectedDispatchSize, directSubmission.ringCommandStream.getUsed());
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parsePipeControl = true;
|
||||
hwParse.parseCommands<FamilyType>(directSubmission.ringCommandStream, 0);
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
MI_BATCH_BUFFER_START *bbStart = hwParse.getCommand<MI_BATCH_BUFFER_START>();
|
||||
ASSERT_NE(nullptr, bbStart);
|
||||
|
||||
bool foundFlush = false;
|
||||
LinearStream parseDispatch;
|
||||
uint8_t buffer[256];
|
||||
parseDispatch.replaceBuffer(buffer, 256);
|
||||
RenderDispatcher<FamilyType>::dispatchCacheFlush(parseDispatch, pDevice->getRootDeviceEnvironment(), 0ull);
|
||||
auto expectedPipeControl = static_cast<PIPE_CONTROL *>(parseDispatch.getCpuBase());
|
||||
for (auto it = hwParse.pipeControlList.begin(); it != hwParse.pipeControlList.end(); it++) {
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (memcmp(expectedPipeControl, pipeControl, sizeof(PIPE_CONTROL)) == 0) {
|
||||
foundFlush = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_FALSE(foundFlush);
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionDispatchBufferTest,
|
||||
givenDirectSubmissionRingStartAndSwitchBuffersWhenDispatchingCommandBufferThenExpectDispatchInCommandBufferAndQueueCountIncrease) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
@@ -771,6 +821,66 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenRingBufferRestartRequestWhenDi
|
||||
EXPECT_EQ(directSubmission.submitCount, 1u);
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenDispatchingWorkloadThenProgramSfenceInstruction) {
|
||||
DebugManagerStateRestore restorer{};
|
||||
|
||||
using Dispatcher = BlitterDispatcher<FamilyType>;
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
for (int32_t debugFlag : {-1, 0, 1, 2}) {
|
||||
debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(debugFlag);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(directSubmission.initialize(true));
|
||||
|
||||
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
|
||||
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
|
||||
|
||||
EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
|
||||
|
||||
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
|
||||
uint32_t expectedMfenceCount = 0u;
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice && !pDevice->getProductHelper().isAcquireGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()) && expectedSfenceCount > 0u) {
|
||||
--expectedSfenceCount;
|
||||
++expectedMfenceCount;
|
||||
}
|
||||
|
||||
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
|
||||
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenStoppingRingbufferThenProgramSfenceInstruction) {
|
||||
DebugManagerStateRestore restorer{};
|
||||
|
||||
using Dispatcher = BlitterDispatcher<FamilyType>;
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
for (int32_t debugFlag : {-1, 0, 1, 2}) {
|
||||
debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(debugFlag);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(directSubmission.initialize(true));
|
||||
|
||||
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
|
||||
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
|
||||
|
||||
EXPECT_TRUE(directSubmission.stopRingBuffer(false));
|
||||
|
||||
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
|
||||
uint32_t expectedMfenceCount = 0u;
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice && !directSubmission.pciBarrierPtr && !pDevice->getProductHelper().isAcquireGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()) && expectedSfenceCount > 0u) {
|
||||
--expectedSfenceCount;
|
||||
++expectedMfenceCount;
|
||||
}
|
||||
|
||||
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
|
||||
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
|
||||
}
|
||||
}
|
||||
|
||||
struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBufferTest {
|
||||
void SetUp() override {
|
||||
debugManager.flags.DirectSubmissionRelaxedOrdering.set(1);
|
||||
|
||||
@@ -95,6 +95,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
|
||||
using BaseClass::handleResidency;
|
||||
using BaseClass::handleSwitchRingBuffers;
|
||||
using BaseClass::immWritePostSyncOffset;
|
||||
using BaseClass::inputMonitorFenceDispatchRequirement;
|
||||
using BaseClass::isCompleted;
|
||||
using BaseClass::isDisablePrefetcherRequired;
|
||||
using BaseClass::isNewResourceHandleNeeded;
|
||||
@@ -108,6 +109,7 @@ struct MockDrmDirectSubmission : public DrmDirectSubmission<GfxFamily, Dispatche
|
||||
using BaseClass::ringBuffers;
|
||||
using BaseClass::ringStart;
|
||||
using BaseClass::rootDeviceEnvironment;
|
||||
using BaseClass::sfenceMode;
|
||||
using BaseClass::submit;
|
||||
using BaseClass::switchRingBuffers;
|
||||
using BaseClass::tagAddress;
|
||||
@@ -322,6 +324,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenPciBarrierWhenCreateDirectSubmissionThenP
|
||||
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||
|
||||
EXPECT_NE(nullptr, directSubmission.pciBarrierPtr);
|
||||
EXPECT_NE(DirectSubmissionSfenceMode::disabled, directSubmission.sfenceMode);
|
||||
EXPECT_FALSE(directSubmission.miMemFenceRequired);
|
||||
|
||||
SysCalls::munmap(ptr, MemoryConstants::pageSize);
|
||||
@@ -342,6 +345,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenPciBarrierWhenCreateDirectSubmissionAndMm
|
||||
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||
|
||||
EXPECT_EQ(nullptr, directSubmission.pciBarrierPtr);
|
||||
EXPECT_NE(DirectSubmissionSfenceMode::disabled, directSubmission.sfenceMode);
|
||||
auto expectMiMemFence = device->getHardwareInfo().capabilityTable.isIntegratedDevice ? false : device->getRootDeviceEnvironment().getHelper<ProductHelper>().isAcquireGlobalFenceInDirectSubmissionRequired(device->getHardwareInfo());
|
||||
EXPECT_EQ(directSubmission.miMemFenceRequired, expectMiMemFence);
|
||||
|
||||
@@ -362,6 +366,7 @@ HWTEST_F(DrmDirectSubmissionTest, givenPciBarrierDisabledWhenCreateDirectSubmiss
|
||||
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> directSubmission(commandStreamReceiver);
|
||||
|
||||
EXPECT_EQ(nullptr, directSubmission.pciBarrierPtr);
|
||||
EXPECT_NE(DirectSubmissionSfenceMode::disabled, directSubmission.sfenceMode);
|
||||
auto expectMiMemFence = device->getHardwareInfo().capabilityTable.isIntegratedDevice ? false : device->getRootDeviceEnvironment().getHelper<ProductHelper>().isAcquireGlobalFenceInDirectSubmissionRequired(device->getHardwareInfo());
|
||||
EXPECT_EQ(directSubmission.miMemFenceRequired, expectMiMemFence);
|
||||
|
||||
@@ -1208,6 +1213,124 @@ HWTEST_F(DrmDirectSubmissionTest, givenDrmDirectSubmissionWhenEnableRingSwitchTa
|
||||
drmDirectSubmission.ringStart = false;
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenDrmDirectSubmissionWhenGettingDefaultInputMonitorFencePolicyThenDefaultIsTrue) {
|
||||
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(drmDirectSubmission.inputMonitorFenceDispatchRequirement);
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest,
|
||||
givenDrmDirectSubmissionWithStallingCommandInputMonitorFencePolicyWhenDispatchingWorkloadWithDisabledMonitorFenceThenDrmIgnoresInputFlag) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(0);
|
||||
|
||||
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_FALSE(drmDirectSubmission.inputMonitorFenceDispatchRequirement);
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
EXPECT_TRUE(drmDirectSubmission.initialize(false));
|
||||
|
||||
BatchBuffer batchBuffer = {};
|
||||
GraphicsAllocation *commandBuffer = nullptr;
|
||||
LinearStream stream;
|
||||
|
||||
const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000,
|
||||
AllocationType::commandBuffer, device->getDeviceBitfield()};
|
||||
commandBuffer = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties);
|
||||
|
||||
stream.replaceGraphicsAllocation(commandBuffer);
|
||||
stream.replaceBuffer(commandBuffer->getUnderlyingBuffer(), commandBuffer->getUnderlyingBufferSize());
|
||||
stream.getSpace(0x20);
|
||||
|
||||
memset(stream.getCpuBase(), 0, 0x20);
|
||||
|
||||
batchBuffer.endCmdPtr = ptrOffset(stream.getCpuBase(), 0x20);
|
||||
batchBuffer.commandBufferAllocation = commandBuffer;
|
||||
batchBuffer.usedSize = 0x40;
|
||||
batchBuffer.taskStartAddress = 0x881112340000;
|
||||
batchBuffer.stream = &stream;
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
|
||||
EXPECT_TRUE(drmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parsePipeControl = true;
|
||||
hwParse.parseCommands<FamilyType>(drmDirectSubmission.ringCommandStream, 0);
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
bool foundFenceUpdate = false;
|
||||
for (auto &it : hwParse.pipeControlList) {
|
||||
PIPE_CONTROL *pipeControl = reinterpret_cast<PIPE_CONTROL *>(it);
|
||||
if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
foundFenceUpdate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_FALSE(foundFenceUpdate);
|
||||
|
||||
executionEnvironment.memoryManager->freeGraphicsMemory(commandBuffer);
|
||||
*drmDirectSubmission.tagAddress = 1;
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest,
|
||||
givenDrmDirectSubmissionWithExplicitFlagInputMonitorFencePolicyWhenDispatchingWorkloadWithDisabledMonitorFenceThenDrmIgnoresInputFlag) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(1);
|
||||
|
||||
MockDrmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>> drmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(drmDirectSubmission.inputMonitorFenceDispatchRequirement);
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
EXPECT_TRUE(drmDirectSubmission.initialize(false));
|
||||
|
||||
BatchBuffer batchBuffer = {};
|
||||
GraphicsAllocation *commandBuffer = nullptr;
|
||||
LinearStream stream;
|
||||
|
||||
const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000,
|
||||
AllocationType::commandBuffer, device->getDeviceBitfield()};
|
||||
commandBuffer = executionEnvironment.memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties);
|
||||
|
||||
stream.replaceGraphicsAllocation(commandBuffer);
|
||||
stream.replaceBuffer(commandBuffer->getUnderlyingBuffer(), commandBuffer->getUnderlyingBufferSize());
|
||||
stream.getSpace(0x20);
|
||||
|
||||
memset(stream.getCpuBase(), 0, 0x20);
|
||||
|
||||
batchBuffer.endCmdPtr = ptrOffset(stream.getCpuBase(), 0x20);
|
||||
batchBuffer.commandBufferAllocation = commandBuffer;
|
||||
batchBuffer.usedSize = 0x40;
|
||||
batchBuffer.taskStartAddress = 0x881112340000;
|
||||
batchBuffer.stream = &stream;
|
||||
batchBuffer.dispatchMonitorFence = true;
|
||||
|
||||
EXPECT_TRUE(drmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp));
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parsePipeControl = true;
|
||||
hwParse.parseCommands<FamilyType>(drmDirectSubmission.ringCommandStream, 0);
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
bool foundFenceUpdate = false;
|
||||
for (auto &it : hwParse.pipeControlList) {
|
||||
PIPE_CONTROL *pipeControl = reinterpret_cast<PIPE_CONTROL *>(it);
|
||||
if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
foundFenceUpdate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_FALSE(foundFenceUpdate);
|
||||
|
||||
executionEnvironment.memoryManager->freeGraphicsMemory(commandBuffer);
|
||||
*drmDirectSubmission.tagAddress = 1;
|
||||
}
|
||||
|
||||
HWTEST_F(DrmDirectSubmissionTest, givenGpuHangWhenWaitCalledThenGpuHangDetected) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
|
||||
@@ -799,6 +799,7 @@ HWTEST_F(WddmDirectSubmissionTest,
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
MockWddmDirectSubmission<FamilyType, Dispatcher> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement);
|
||||
|
||||
bool ret = wddmDirectSubmission.initialize(true);
|
||||
EXPECT_TRUE(ret);
|
||||
@@ -862,6 +863,7 @@ HWTEST_F(WddmDirectSubmissionTest,
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
MockWddmDirectSubmission<FamilyType, Dispatcher> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement);
|
||||
|
||||
bool ret = wddmDirectSubmission.initialize(true);
|
||||
EXPECT_TRUE(ret);
|
||||
@@ -903,6 +905,128 @@ HWTEST_F(WddmDirectSubmissionTest,
|
||||
EXPECT_TRUE(wddmDirectSubmission.dispatchMonitorFenceRequired(true));
|
||||
}
|
||||
|
||||
HWTEST_F(WddmDirectSubmissionTest,
|
||||
givenWddmDirectSubmissionWithDisabledMonitorFenceWhenInputPolicyIsStallingCommandAndBatchBufferDispatchedWithExplicitMonitorFenceFlagThenDispatchNoPostSyncOperation) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(0);
|
||||
|
||||
BatchBuffer batchBuffer = {};
|
||||
GraphicsAllocation *clientCommandBuffer = nullptr;
|
||||
std::unique_ptr<LinearStream> clientStream;
|
||||
|
||||
auto memoryManager = executionEnvironment->memoryManager.get();
|
||||
const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000,
|
||||
AllocationType::commandBuffer, device->getDeviceBitfield()};
|
||||
clientCommandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties);
|
||||
ASSERT_NE(nullptr, clientCommandBuffer);
|
||||
|
||||
clientStream = std::make_unique<LinearStream>(clientCommandBuffer);
|
||||
clientStream->getSpace(0x40);
|
||||
|
||||
memset(clientStream->getCpuBase(), 0, 0x20);
|
||||
|
||||
batchBuffer.endCmdPtr = ptrOffset(clientStream->getCpuBase(), 0x20);
|
||||
batchBuffer.commandBufferAllocation = clientCommandBuffer;
|
||||
batchBuffer.usedSize = 0x40;
|
||||
batchBuffer.taskStartAddress = clientCommandBuffer->getGpuAddress();
|
||||
batchBuffer.stream = clientStream.get();
|
||||
batchBuffer.dispatchMonitorFence = true;
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
MockWddmDirectSubmission<FamilyType, Dispatcher> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_FALSE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement);
|
||||
|
||||
bool ret = wddmDirectSubmission.initialize(true);
|
||||
EXPECT_TRUE(ret);
|
||||
|
||||
size_t sizeUsedBefore = wddmDirectSubmission.ringCommandStream.getUsed();
|
||||
ret = wddmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_TRUE(ret);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parsePipeControl = true;
|
||||
hwParse.parseCommands<FamilyType>(wddmDirectSubmission.ringCommandStream, sizeUsedBefore);
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
bool foundFenceUpdate = false;
|
||||
for (auto it = hwParse.pipeControlList.begin(); it != hwParse.pipeControlList.end(); it++) {
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
foundFenceUpdate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_FALSE(foundFenceUpdate);
|
||||
|
||||
memoryManager->freeGraphicsMemory(clientCommandBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(WddmDirectSubmissionTest,
|
||||
givenWddmDirectSubmissionWithDisabledMonitorFenceWhenInputPolicyIsExplicitMonitorFenceAndBatchBufferDispatchedWithStallingCommandFlagThenDispatchNoPostSyncOperation) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION;
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
DebugManagerStateRestore dbgRestorer;
|
||||
debugManager.flags.DirectSubmissionMonitorFenceInputPolicy.set(1);
|
||||
|
||||
BatchBuffer batchBuffer = {};
|
||||
GraphicsAllocation *clientCommandBuffer = nullptr;
|
||||
std::unique_ptr<LinearStream> clientStream;
|
||||
|
||||
auto memoryManager = executionEnvironment->memoryManager.get();
|
||||
const AllocationProperties commandBufferProperties{device->getRootDeviceIndex(), 0x1000,
|
||||
AllocationType::commandBuffer, device->getDeviceBitfield()};
|
||||
clientCommandBuffer = memoryManager->allocateGraphicsMemoryWithProperties(commandBufferProperties);
|
||||
ASSERT_NE(nullptr, clientCommandBuffer);
|
||||
|
||||
clientStream = std::make_unique<LinearStream>(clientCommandBuffer);
|
||||
clientStream->getSpace(0x40);
|
||||
|
||||
memset(clientStream->getCpuBase(), 0, 0x20);
|
||||
|
||||
batchBuffer.endCmdPtr = ptrOffset(clientStream->getCpuBase(), 0x20);
|
||||
batchBuffer.commandBufferAllocation = clientCommandBuffer;
|
||||
batchBuffer.usedSize = 0x40;
|
||||
batchBuffer.taskStartAddress = clientCommandBuffer->getGpuAddress();
|
||||
batchBuffer.stream = clientStream.get();
|
||||
batchBuffer.hasStallingCmds = true;
|
||||
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
MockWddmDirectSubmission<FamilyType, Dispatcher> wddmDirectSubmission(*device->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(wddmDirectSubmission.inputMonitorFenceDispatchRequirement);
|
||||
|
||||
bool ret = wddmDirectSubmission.initialize(true);
|
||||
EXPECT_TRUE(ret);
|
||||
|
||||
size_t sizeUsedBefore = wddmDirectSubmission.ringCommandStream.getUsed();
|
||||
ret = wddmDirectSubmission.dispatchCommandBuffer(batchBuffer, flushStamp);
|
||||
EXPECT_TRUE(ret);
|
||||
|
||||
HardwareParse hwParse;
|
||||
hwParse.parsePipeControl = true;
|
||||
hwParse.parseCommands<FamilyType>(wddmDirectSubmission.ringCommandStream, sizeUsedBefore);
|
||||
hwParse.findHardwareCommands<FamilyType>();
|
||||
|
||||
bool foundFenceUpdate = false;
|
||||
for (auto it = hwParse.pipeControlList.begin(); it != hwParse.pipeControlList.end(); it++) {
|
||||
auto pipeControl = genCmdCast<PIPE_CONTROL *>(*it);
|
||||
if (pipeControl->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) {
|
||||
foundFenceUpdate = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
EXPECT_FALSE(foundFenceUpdate);
|
||||
|
||||
memoryManager->freeGraphicsMemory(clientCommandBuffer);
|
||||
}
|
||||
|
||||
HWTEST_F(WddmDirectSubmissionTest,
|
||||
givenBatchBufferWithThrottleLowWhenCallDispatchCommandBufferThenStoreLastSubmitedThrottle) {
|
||||
|
||||
@@ -1025,6 +1149,34 @@ HWTEST_F(WddmDirectSubmissionTest, givenDirectSubmissionWhenUnblockPagingFenceSe
|
||||
EXPECT_GT(wddmDirectSubmission.semaphoreData->pagingFenceCounter, mockedPagingFence);
|
||||
}
|
||||
|
||||
HWTEST_F(WddmDirectSubmissionTest, givenDebugFlagSetWhenUnblockPagingFenceSemaphoreThenProgramSfenceInstruction) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
DebugManagerStateRestore restorer{};
|
||||
FlushStampTracker flushStamp(true);
|
||||
|
||||
for (int32_t debugFlag : {-1, 0, 1, 2}) {
|
||||
debugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(debugFlag);
|
||||
|
||||
MockWddmDirectSubmission<FamilyType, Dispatcher> directSubmission(*device->getDefaultEngine().commandStreamReceiver);
|
||||
EXPECT_TRUE(directSubmission.initialize(true));
|
||||
|
||||
auto initialSfenceCounterValue = CpuIntrinsicsTests::sfenceCounter.load();
|
||||
auto initialMfenceCounterValue = CpuIntrinsicsTests::mfenceCounter.load();
|
||||
|
||||
directSubmission.unblockPagingFenceSemaphore(0u);
|
||||
|
||||
uint32_t expectedSfenceCount = (debugFlag == -1) ? 2 : static_cast<uint32_t>(debugFlag);
|
||||
uint32_t expectedMfenceCount = 0u;
|
||||
if (!device->getHardwareInfo().capabilityTable.isIntegratedDevice && !directSubmission.pciBarrierPtr && !device->getProductHelper().isAcquireGlobalFenceInDirectSubmissionRequired(device->getHardwareInfo()) && expectedSfenceCount > 0u) {
|
||||
--expectedSfenceCount;
|
||||
++expectedMfenceCount;
|
||||
}
|
||||
|
||||
EXPECT_EQ(initialSfenceCounterValue + expectedSfenceCount, CpuIntrinsicsTests::sfenceCounter);
|
||||
EXPECT_EQ(initialMfenceCounterValue + expectedMfenceCount, CpuIntrinsicsTests::mfenceCounter);
|
||||
}
|
||||
}
|
||||
|
||||
TEST(DirectSubmissionControllerWindowsTest, givenDirectSubmissionControllerWhenCallingSleepThenRequestHighResolutionTimers) {
|
||||
VariableBackup<size_t> timeBeginPeriodCalledBackup(&SysCalls::timeBeginPeriodCalled, 0u);
|
||||
VariableBackup<MMRESULT> timeBeginPeriodLastValueBackup(&SysCalls::timeBeginPeriodLastValue, 0u);
|
||||
|
||||
@@ -38,6 +38,7 @@ struct MockWddmDirectSubmission : public WddmDirectSubmission<GfxFamily, Dispatc
|
||||
using BaseClass::handleResidency;
|
||||
using BaseClass::handleStopRingBuffer;
|
||||
using BaseClass::handleSwitchRingBuffers;
|
||||
using BaseClass::inputMonitorFenceDispatchRequirement;
|
||||
using BaseClass::isCompleted;
|
||||
using BaseClass::isDisablePrefetcherRequired;
|
||||
using BaseClass::isNewResourceHandleNeeded;
|
||||
|
||||
Reference in New Issue
Block a user